[PATCH] D143860: [X86] Add additional operations that masked instructions can combine with

Noah Goldstein via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 14 23:17:01 PST 2023


goldstein.w.n updated this revision to Diff 497563.
goldstein.w.n added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143860/new/

https://reviews.llvm.org/D143860

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/test/CodeGen/X86/vselect-avx512.ll


Index: llvm/test/CodeGen/X86/vselect-avx512.ll
===================================================================
--- llvm/test/CodeGen/X86/vselect-avx512.ll
+++ llvm/test/CodeGen/X86/vselect-avx512.ll
@@ -17,24 +17,21 @@
 ; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
 ; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
 ; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT:    movb $-86, %al
-; CHECK-NEXT:    kmovw %eax, %k2
-; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k2}
-; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT:    vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
+; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
 ; CHECK-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT:    movw $-3856, %ax # imm = 0xF0F0
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k2}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
 ; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
 ; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT:    movb $-52, %al
-; CHECK-NEXT:    kmovw %eax, %k3
-; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k3}
-; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
-; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
-; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm2 {%k2}
-; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-NEXT:    vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
 ; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
 ; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
@@ -44,16 +41,15 @@
 ; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm1 = zmm2[2,3,0,1],zmm0[6,7,4,5]
 ; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7]
 ; CHECK-NEXT:    vpminsd %zmm0, %zmm1, %zmm2
-; CHECK-NEXT:    vpmaxsd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm2 {%k3}
+; CHECK-NEXT:    vpmaxsd %zmm0, %zmm1, %zmm2 {%k2}
 ; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
 ; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
 ; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k2}
-; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
-; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm2, (%rdi)
+; CHECK-NEXT:    vshufps {{.*#+}} zmm2 = zmm1[0,1],zmm0[2,3],zmm1[4,5],zmm0[6,7],zmm1[8,9],zmm0[10,11],zmm1[12,13],zmm0[14,15]
+; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm1[1,0],zmm0[3,2],zmm1[5,4],zmm0[7,6],zmm1[9,8],zmm0[11,10],zmm1[13,12],zmm0[15,14]
+; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, (%rdi)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %2 = load <16 x i32>, ptr %0, align 1
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19924,6 +19924,17 @@
     case ISD::SUB:
     case ISD::AND:
     case ISD::XOR:
+    case ISD::OR:
+    case ISD::SMAX:
+    case ISD::SMIN:
+    case ISD::UMAX:
+    case ISD::UMIN:
+    case ISD::ABS:
+    case ISD::SHL:
+    case ISD::SRL:
+    case ISD::SRA:
+    case ISD::MUL:
+    case ISD::SETCC:
       break;
     }
     if (!V->hasOneUse())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D143860.497563.patch
Type: text/x-patch
Size: 4370 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230215/9d33e933/attachment.bin>


More information about the llvm-commits mailing list