[llvm] r314082 - [X86] Add tests to show missed opportunities to fold broadcast loads into IFMA instructions when the load is on operand1 of the instrinsic.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 24 12:30:54 PDT 2017
Author: ctopper
Date: Sun Sep 24 12:30:54 2017
New Revision: 314082
URL: http://llvm.org/viewvc/llvm-project?rev=314082&view=rev
Log:
[X86] Add tests to show missed opportunities to fold broadcast loads into IFMA instructions when the load is on operand1 of the instrinsic.
We need to enable commuting during isel to catch this since the load folding tables can't handle broadcasts.
Modified:
llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll
Modified: llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll?rev=314082&r1=314081&r2=314082&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll Sun Sep 24 12:30:54 2017
@@ -124,6 +124,19 @@ define <8 x i64>@test_int_x86_avx512_vpm
ret <8 x i64> %res
}
+define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr) {
+; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0
+; CHECK-NEXT: retq
+
+ %x2load = load i64, i64* %x2ptr
+ %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
+ %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ ret <8 x i64> %res
+}
+
define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) {
; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute:
; CHECK: ## BB#0:
@@ -135,6 +148,20 @@ define <8 x i64>@test_int_x86_avx512_vpm
ret <8 x i64> %res
}
+define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2) {
+; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm2
+; CHECK-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
+
+ %x1load = load i64, i64* %x1ptr
+ %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
+ %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ ret <8 x i64> %res
+}
+
define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load:
; CHECK: ## BB#0:
@@ -147,6 +174,20 @@ define <8 x i64>@test_int_x86_avx512_mas
ret <8 x i64> %res
}
+define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+
+ %x2load = load i64, i64* %x2ptr
+ %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
+ %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
+
define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute:
; CHECK: ## BB#0:
@@ -159,6 +200,21 @@ define <8 x i64>@test_int_x86_avx512_mas
ret <8 x i64> %res
}
+define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm2
+; CHECK-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: retq
+
+ %x1load = load i64, i64* %x1ptr
+ %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
+ %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
+
define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load:
; CHECK: ## BB#0:
@@ -171,6 +227,20 @@ define <8 x i64>@test_int_x86_avx512_mas
ret <8 x i64> %res
}
+define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast(<8 x i64> %x0, <8 x i64> %x1, i64* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmadd52huq (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+
+ %x2load = load i64, i64* %x2ptr
+ %x2insert = insertelement <8 x i64> undef, i64 %x2load, i64 0
+ %x2 = shufflevector <8 x i64> %x2insert, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
+
define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute:
; CHECK: ## BB#0:
@@ -182,3 +252,18 @@ define <8 x i64>@test_int_x86_avx512_mas
%res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
ret <8 x i64> %res
}
+
+define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast(<8 x i64> %x0, i64* %x1ptr, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpbroadcastq (%rdi), %zmm2
+; CHECK-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+
+ %x1load = load i64, i64* %x1ptr
+ %x1insert = insertelement <8 x i64> undef, i64 %x1load, i64 0
+ %x1 = shufflevector <8 x i64> %x1insert, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ ret <8 x i64> %res
+}
More information about the llvm-commits
mailing list