[llvm] r314082 - [X86] Add tests to show missed opportunities to fold broadcast loads into IFMA instructions when the load is on operand1 of the instrinsic.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 24 12:30:54 PDT 2017


Author: ctopper
Date: Sun Sep 24 12:30:54 2017
New Revision: 314082

URL: http://llvm.org/viewvc/llvm-project?rev=314082&view=rev
Log:
[X86] Add tests to show missed opportunities to fold broadcast loads into IFMA instructions when the load is on operand1 of the instrinsic.

We need to enable commuting during isel to catch this since the load folding tables can't handle broadcasts.

Modified:
    llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll

Modified: llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll?rev=314082&r1=314081&r2=314082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll Sun Sep 24 12:30:54 2017
@@ -124,6 +124,19 @@ define <8 x i64>@test_int_x86_avx512_vpm
   ret <8 x i64> %res
 }
 
+define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr) {
+; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+
+  %x2load = load i64, i64* %x2ptr
+  %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
+  %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+  ret <8 x i64> %res
+}
+
 define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) {
 ; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
 ; CHECK:       ## BB#0:
@@ -135,6 +148,20 @@ define <8 x i64>@test_int_x86_avx512_vpm
   ret <8 x i64> %res
 }
 
+define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2) {
+; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm2
+; CHECK-NEXT:    vpmadd52huq %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+
+  %x1load = load i64, i64* %x1ptr
+  %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
+  %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+  ret <8 x i64> %res
+}
+
 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
 ; CHECK:       ## BB#0:
@@ -147,6 +174,20 @@ define <8 x i64>@test_int_x86_avx512_mas
   ret <8 x i64> %res
 }
 
+define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+
+  %x2load = load i64, i64* %x2ptr
+  %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
+  %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  ret <8 x i64> %res
+}
+
 define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
 ; CHECK:       ## BB#0:
@@ -159,6 +200,21 @@ define <8 x i64>@test_int_x86_avx512_mas
   ret <8 x i64> %res
 }
 
+define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm2
+; CHECK-NEXT:    vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+
+  %x1load = load i64, i64* %x1ptr
+  %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
+  %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  ret <8 x i64> %res
+}
+
 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
 ; CHECK:       ## BB#0:
@@ -171,6 +227,20 @@ define <8 x i64>@test_int_x86_avx512_mas
   ret <8 x i64> %res
 }
 
+define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+
+  %x2load = load i64, i64* %x2ptr
+  %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
+  %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  ret <8 x i64> %res
+}
+
 define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
 ; CHECK:       ## BB#0:
@@ -182,3 +252,18 @@ define <8 x i64>@test_int_x86_avx512_mas
   %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   ret <8 x i64> %res
 }
+
+define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm2
+; CHECK-NEXT:    vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+
+  %x1load = load i64, i64* %x1ptr
+  %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
+  %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
+  %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  ret <8 x i64> %res
+}




More information about the llvm-commits mailing list