[PATCH] D143860: [X86] Add additional operations that masked instructions can combine with
Noah Goldstein via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 23:17:01 PST 2023
goldstein.w.n updated this revision to Diff 497563.
goldstein.w.n added a comment.
Rebase
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D143860/new/
https://reviews.llvm.org/D143860
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vselect-avx512.ll
Index: llvm/test/CodeGen/X86/vselect-avx512.ll
===================================================================
--- llvm/test/CodeGen/X86/vselect-avx512.ll
+++ llvm/test/CodeGen/X86/vselect-avx512.ll
@@ -17,24 +17,21 @@
; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT: movb $-86, %al
-; CHECK-NEXT: kmovw %eax, %k2
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
+; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
+; CHECK-NEXT: kmovw %eax, %k2
+; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k2}
+; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT: movb $-52, %al
-; CHECK-NEXT: kmovw %eax, %k3
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k3}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
-; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k2}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
@@ -44,16 +41,15 @@
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[2,3,0,1],zmm0[6,7,4,5]
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7]
; CHECK-NEXT: vpminsd %zmm0, %zmm1, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k3}
+; CHECK-NEXT: vpmaxsd %zmm0, %zmm1, %zmm2 {%k2}
; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k2}
-; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovdqu64 %zmm2, (%rdi)
+; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
+; CHECK-NEXT: vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqu64 %zmm1, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%2 = load <16 x i32>, ptr %0, align 1
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19924,6 +19924,17 @@
case ISD::SUB:
case ISD::AND:
case ISD::XOR:
+ case ISD::OR:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::ABS:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::MUL:
+ case ISD::SETCC:
break;
}
if (!V->hasOneUse())
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D143860.497563.patch
Type: text/x-patch
Size: 4370 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230215/9d33e933/attachment.bin>
More information about the llvm-commits
mailing list