[llvm] r373138 - [X86] Add broadcast load unfolding support for VPTESTMD/Q and VPTESTNMD/Q.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 18:56:36 PDT 2019
Author: ctopper
Date: Fri Sep 27 18:56:36 2019
New Revision: 373138
URL: http://llvm.org/viewvc/llvm-project?rev=373138&view=rev
Log:
[X86] Add broadcast load unfolding support for VPTESTMD/Q and VPTESTNMD/Q.
Modified:
llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp
llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp?rev=373138&r1=373137&r2=373138&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp Fri Sep 27 18:56:36 2019
@@ -5374,6 +5374,18 @@ static const X86MemoryFoldTableEntry Bro
{ X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
{ X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
{ X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q },
+ { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D },
+ { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D },
+ { X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D },
+ { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q },
+ { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q },
+ { X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q },
+ { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D },
+ { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D },
+ { X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D },
+ { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q },
+ { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q },
+ { X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q },
{ X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
{ X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
{ X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D },
Modified: llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll?rev=373138&r1=373137&r2=373138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll Fri Sep 27 18:56:36 2019
@@ -4483,3 +4483,153 @@ define void @bcast_unfold_cmp_v8f32_refo
12: ; preds = %2
ret void
}
+
+define void @bcast_unfold_ptestm_v4i32(i32* %arg) {
+; CHECK-LABEL: bcast_unfold_ptestm_v4i32:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB127_1: # %bb1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT: vptestmd %xmm0, %xmm1, %k1
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
+; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
+; CHECK-NEXT: addq $16, %rax
+; CHECK-NEXT: jne .LBB127_1
+; CHECK-NEXT: # %bb.2: # %bb10
+; CHECK-NEXT: retq
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
+ %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
+ %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
+ %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+ %tmp4b = and <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
+ %tmp5 = icmp ne <4 x i32> %tmp4b, zeroinitializer
+ %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
+ %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
+ store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+ %tmp8 = add i64 %tmp, 4
+ %tmp9 = icmp eq i64 %tmp8, 1024
+ br i1 %tmp9, label %bb10, label %bb1
+
+bb10: ; preds = %bb1
+ ret void
+}
+
+define void @bcast_unfold_ptestnm_v4i32(i32* %arg) {
+; CHECK-LABEL: bcast_unfold_ptestnm_v4i32:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB128_1: # %bb1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT: vptestnmd %xmm0, %xmm1, %k1
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
+; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
+; CHECK-NEXT: addq $16, %rax
+; CHECK-NEXT: jne .LBB128_1
+; CHECK-NEXT: # %bb.2: # %bb10
+; CHECK-NEXT: retq
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
+ %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
+ %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
+ %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+ %tmp4b = and <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
+ %tmp5 = icmp eq <4 x i32> %tmp4b, zeroinitializer
+ %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
+ %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
+ store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+ %tmp8 = add i64 %tmp, 4
+ %tmp9 = icmp eq i64 %tmp8, 1024
+ br i1 %tmp9, label %bb10, label %bb1
+
+bb10: ; preds = %bb1
+ ret void
+}
+
+define void @bcast_unfold_ptestm_v4i64(i64* %arg) {
+; CHECK-LABEL: bcast_unfold_ptestm_v4i64:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB129_1: # %bb1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT: vptestmq %ymm0, %ymm1, %k1
+; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
+; CHECK-NEXT: addq $32, %rax
+; CHECK-NEXT: jne .LBB129_1
+; CHECK-NEXT: # %bb.2: # %bb10
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
+ %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
+ %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
+ %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+ %tmp4b = and <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
+ %tmp5 = icmp ne <4 x i64> %tmp4b, zeroinitializer
+ %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
+ %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
+ store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+ %tmp8 = add i64 %tmp, 4
+ %tmp9 = icmp eq i64 %tmp8, 1024
+ br i1 %tmp9, label %bb10, label %bb1
+
+bb10: ; preds = %bb1
+ ret void
+}
+
+define void @bcast_unfold_ptestnm_v4i64(i64* %arg) {
+; CHECK-LABEL: bcast_unfold_ptestnm_v4i64:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
+; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB130_1: # %bb1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT: vptestnmq %ymm0, %ymm1, %k1
+; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax)
+; CHECK-NEXT: addq $32, %rax
+; CHECK-NEXT: jne .LBB130_1
+; CHECK-NEXT: # %bb.2: # %bb10
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
+ %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
+ %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
+ %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+ %tmp4b = and <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
+ %tmp5 = icmp eq <4 x i64> %tmp4b, zeroinitializer
+ %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
+ %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
+ store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+ %tmp8 = add i64 %tmp, 4
+ %tmp9 = icmp eq i64 %tmp8, 1024
+ br i1 %tmp9, label %bb10, label %bb1
+
+bb10: ; preds = %bb1
+ ret void
+}
More information about the llvm-commits
mailing list