[llvm] b5fc2a4 - Add additional operations that masked instructions can combine with
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 26 10:12:03 PST 2023
Author: Noah Goldstein
Date: 2023-02-26T12:11:16-06:00
New Revision: b5fc2a474ebabb082f52f43cea2d5b299f28bd70
URL: https://github.com/llvm/llvm-project/commit/b5fc2a474ebabb082f52f43cea2d5b299f28bd70
DIFF: https://github.com/llvm/llvm-project/commit/b5fc2a474ebabb082f52f43cea2d5b299f28bd70.diff
LOG: Add additional operations that masked instructions can combine with
Added: OR, SMAX, SMIN, UMAX, UMIN, ABS, SHL, SRL, SRA, MUL
Intentionally not generically using TLI.isBinOp as that causes
regressions as there are many binops that cannot combine with masked
instructions.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D143860
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-mask-with-shuffle.ll
llvm/test/CodeGen/X86/vselect-avx512.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7ff686cc8a9e..abbc5ebb7108 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19855,6 +19855,16 @@ static bool canCombineAsMaskOperation(SDValue V1, SDValue V2,
case ISD::SUB:
case ISD::AND:
case ISD::XOR:
+ case ISD::OR:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::ABS:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::MUL:
break;
}
if (!V->hasOneUse())
diff --git a/llvm/test/CodeGen/X86/combine-mask-with-shuffle.ll b/llvm/test/CodeGen/X86/combine-mask-with-shuffle.ll
index e105d4e3f0cc..268ac3dd31b8 100644
--- a/llvm/test/CodeGen/X86/combine-mask-with-shuffle.ll
+++ b/llvm/test/CodeGen/X86/combine-mask-with-shuffle.ll
@@ -10,10 +10,9 @@ define <16 x i32> @combine_mask_with_or(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpord %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -32,10 +31,9 @@ define <16 x i32> @combine_mask_with_mul(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpmulld %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -51,10 +49,9 @@ define <16 x i32> @combine_mask_with_abs(<16 x i32> %v0) {
; CHECK: # %bb.0:
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
; CHECK-NEXT: vpabsd %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -75,10 +72,9 @@ define <16 x i32> @combine_mask_with_umin(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpminud %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -97,10 +93,9 @@ define <16 x i32> @combine_mask_with_umax(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpmaxud %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -119,10 +114,9 @@ define <16 x i32> @combine_mask_with_smin(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpminsd %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -141,10 +135,9 @@ define <16 x i32> @combine_mask_with_smax(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -163,10 +156,9 @@ define <16 x i32> @combine_mask_with_shl(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpsllvd %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -185,10 +177,9 @@ define <16 x i32> @combine_mask_with_ashr(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpsravd %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -207,10 +198,9 @@ define <16 x i32> @combine_mask_with_lshr(<16 x i32> %v0) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpopcntd %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
diff --git a/llvm/test/CodeGen/X86/vselect-avx512.ll b/llvm/test/CodeGen/X86/vselect-avx512.ll
index c75590d07c0e..cd59562b2125 100644
--- a/llvm/test/CodeGen/X86/vselect-avx512.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx512.ll
@@ -17,24 +17,21 @@ define void @PR46249(ptr noalias nocapture noundef %0) {
; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT: movb $-86, %al
-; CHECK-NEXT: kmovw %eax, %k2
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
+; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k2}
+; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
-; CHECK-NEXT: kmovw %eax, %k3
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k3}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
-; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k2}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
@@ -44,16 +41,15 @@ define void @PR46249(ptr noalias nocapture noundef %0) {
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[2,3,0,1],zmm0[6,7,4,5]
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7]
; CHECK-NEXT: vpminsd %zmm0, %zmm1, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k3}
+; CHECK-NEXT: vpmaxsd %zmm0, %zmm1, %zmm2 {%k2}
; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovdqu64 %zmm2, (%rdi)
+; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
+; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqu64 %zmm1, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%2 = load <16 x i32>, ptr %0, align 1
More information about the llvm-commits
mailing list