[llvm] 170c525 - [X86] combineExtractVectorElt - fold extract(trunc(x),c) -> trunc(extract(x,c))

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 8 03:01:44 PDT 2024


Author: Simon Pilgrim
Date: 2024-04-08T11:01:19+01:00
New Revision: 170c525d79a4ab3659041b0655ac9697768fc915

URL: https://github.com/llvm/llvm-project/commit/170c525d79a4ab3659041b0655ac9697768fc915
DIFF: https://github.com/llvm/llvm-project/commit/170c525d79a4ab3659041b0655ac9697768fc915.diff

LOG: [X86] combineExtractVectorElt - fold extract(trunc(x),c) -> trunc(extract(x,c))

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/test/CodeGen/X86/insertelement-var-index.ll
    llvm/test/CodeGen/X86/movmsk-cmp.ll
    llvm/test/CodeGen/X86/pr63439.ll
    llvm/test/CodeGen/X86/pr64439.ll
    llvm/test/CodeGen/X86/vec_cast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6f65344215c020..f24e0fc25faccc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44710,6 +44710,17 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // Attempt to fold extract(trunc(x),c) -> trunc(extract(x,c)).
+  if (CIdx && InputVector.getOpcode() == ISD::TRUNCATE) {
+    SDValue TruncSrc = InputVector.getOperand(0);
+    EVT TruncSVT = TruncSrc.getValueType().getScalarType();
+    if (DCI.isBeforeLegalize() && TLI.isTypeLegal(TruncSVT)) {
+      SDValue NewExt =
+          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TruncSVT, TruncSrc, EltIdx);
+      return DAG.getAnyExtOrTrunc(NewExt, dl, VT);
+    }
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 3e40bfa1e791d0..2a77d0238721c0 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1050,11 +1050,9 @@ define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) nounwin
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    vpminub %xmm1, %xmm0, %xmm1
 ; KNL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
-; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT:    kshiftrw $2, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    vpextrb $2, %xmm0, %eax
+; KNL-NEXT:    notb %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    andl $1, %eax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
@@ -1081,11 +1079,9 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) nounwin
 ; KNL-NEXT:    vpminub %ymm1, %ymm0, %ymm1
 ; KNL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
-; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT:    kshiftrw $15, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %ecx
+; KNL-NEXT:    vpextrb $15, %xmm0, %eax
+; KNL-NEXT:    notb %al
+; KNL-NEXT:    movzbl %al, %ecx
 ; KNL-NEXT:    andl $1, %ecx
 ; KNL-NEXT:    movl $4, %eax
 ; KNL-NEXT:    subl %ecx, %eax
@@ -1116,15 +1112,10 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) nounwind
 ; KNL-NEXT:    vpminub %ymm1, %ymm0, %ymm1
 ; KNL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
-; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT:    kshiftrw $15, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    andb $1, %al
-; KNL-NEXT:    movb $4, %cl
-; KNL-NEXT:    subb %al, %cl
-; KNL-NEXT:    movzbl %cl, %eax
+; KNL-NEXT:    vpextrb $15, %xmm0, %eax
+; KNL-NEXT:    notb %al
+; KNL-NEXT:    addb $4, %al
+; KNL-NEXT:    movzbl %al, %eax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
index 5420e6b5ce86f3..16946caf9a328f 100644
--- a/llvm/test/CodeGen/X86/insertelement-var-index.ll
+++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -2294,13 +2294,13 @@ define i32 @PR44139(ptr %p) {
 ;
 ; AVX1-LABEL: PR44139:
 ; AVX1:       # %bb.0:
+; AVX1-NEXT:    movq (%rdi), %rax
 ; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
-; AVX1-NEXT:    vpinsrq $1, (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
 ; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 ; AVX1-NEXT:    vmovaps %ymm0, 64(%rdi)
 ; AVX1-NEXT:    vmovaps %ymm0, 96(%rdi)
 ; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
-; AVX1-NEXT:    movl (%rdi), %eax
 ; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
 ; AVX1-NEXT:    leal 2147483647(%rax), %ecx
 ; AVX1-NEXT:    testl %eax, %eax
@@ -2315,13 +2315,13 @@ define i32 @PR44139(ptr %p) {
 ;
 ; AVX2-LABEL: PR44139:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    movq (%rdi), %rax
 ; AVX2-NEXT:    vpbroadcastq (%rdi), %ymm0
-; AVX2-NEXT:    vpinsrq $1, (%rdi), %xmm0, %xmm1
+; AVX2-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 ; AVX2-NEXT:    vmovdqa %ymm0, 64(%rdi)
 ; AVX2-NEXT:    vmovdqa %ymm0, 96(%rdi)
 ; AVX2-NEXT:    vmovdqa %ymm0, 32(%rdi)
-; AVX2-NEXT:    movl (%rdi), %eax
 ; AVX2-NEXT:    vmovdqa %ymm1, (%rdi)
 ; AVX2-NEXT:    leal 2147483647(%rax), %ecx
 ; AVX2-NEXT:    testl %eax, %eax
@@ -2336,14 +2336,12 @@ define i32 @PR44139(ptr %p) {
 ;
 ; AVX512-LABEL: PR44139:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
-; AVX512-NEXT:    vpbroadcastq (%rdi), %zmm1
-; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
-; AVX512-NEXT:    vpinsrq $1, (%rdi), %xmm1, %xmm2
-; AVX512-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm2
-; AVX512-NEXT:    vmovdqa64 %zmm1, 64(%rdi)
-; AVX512-NEXT:    vmovdqa64 %zmm2, (%rdi)
-; AVX512-NEXT:    vmovd %xmm0, %eax
+; AVX512-NEXT:    movq (%rdi), %rax
+; AVX512-NEXT:    vpbroadcastq (%rdi), %zmm0
+; AVX512-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
+; AVX512-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm1
+; AVX512-NEXT:    vmovdqa64 %zmm0, 64(%rdi)
+; AVX512-NEXT:    vmovdqa64 %zmm1, (%rdi)
 ; AVX512-NEXT:    leal 2147483647(%rax), %ecx
 ; AVX512-NEXT:    testl %eax, %eax
 ; AVX512-NEXT:    cmovnsl %eax, %ecx

diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index e8b3121ecfb523..253f990f8735ee 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -3682,18 +3682,12 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; KNL-LABEL: movmsk_v16i8:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT:    kshiftrw $15, %k0, %k1
-; KNL-NEXT:    kmovw %k1, %ecx
-; KNL-NEXT:    kshiftrw $8, %k0, %k1
-; KNL-NEXT:    kmovw %k1, %edx
-; KNL-NEXT:    kshiftrw $3, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    xorb %dl, %al
-; KNL-NEXT:    andb %cl, %al
+; KNL-NEXT:    vpextrb $15, %xmm0, %ecx
+; KNL-NEXT:    vpextrb $8, %xmm0, %edx
+; KNL-NEXT:    vpextrb $3, %xmm0, %eax
+; KNL-NEXT:    xorl %edx, %eax
+; KNL-NEXT:    andl %ecx, %eax
 ; KNL-NEXT:    # kill: def $al killed $al killed $eax
-; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: movmsk_v16i8:

diff  --git a/llvm/test/CodeGen/X86/pr63439.ll b/llvm/test/CodeGen/X86/pr63439.ll
index 155da0c629127a..7018940faa81fe 100644
--- a/llvm/test/CodeGen/X86/pr63439.ll
+++ b/llvm/test/CodeGen/X86/pr63439.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
 
 define i16 @mulhs(i16 %a0, i16 %a1) {
 ; CHECK-LABEL: mulhs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movswl %si, %ecx
-; CHECK-NEXT:    movswl %di, %eax
+; CHECK-NEXT:    movswl %di, %ecx
+; CHECK-NEXT:    movswl %si, %eax
 ; CHECK-NEXT:    imull %ecx, %eax
 ; CHECK-NEXT:    shrl $16, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -23,23 +23,14 @@ define i16 @mulhs(i16 %a0, i16 %a1) {
 }
 
 define i16 @mulhu(i16 %a0, i16 %a1) {
-; SSE-LABEL: mulhu:
-; SSE:       # %bb.0:
-; SSE-NEXT:    movzwl %si, %ecx
-; SSE-NEXT:    movzwl %di, %eax
-; SSE-NEXT:    imull %ecx, %eax
-; SSE-NEXT:    shrl $16, %eax
-; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: mulhu:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vmovd %esi, %xmm1
-; AVX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX-NEXT:    retq
+; CHECK-LABEL: mulhu:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzwl %di, %ecx
+; CHECK-NEXT:    movzwl %si, %eax
+; CHECK-NEXT:    imull %ecx, %eax
+; CHECK-NEXT:    shrl $16, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq
     %x0 = zext i16 %a0 to i32
     %x1 = zext i16 %a1 to i32
     %v0 = insertelement <1 x i32> <i32 undef>, i32 %x0, i32 0

diff  --git a/llvm/test/CodeGen/X86/pr64439.ll b/llvm/test/CodeGen/X86/pr64439.ll
index 7aa52fc49a9fcf..6e3d007dd78c90 100644
--- a/llvm/test/CodeGen/X86/pr64439.ll
+++ b/llvm/test/CodeGen/X86/pr64439.ll
@@ -4,10 +4,9 @@
 define void @f(ptr %0, <32 x i1> %1, i32 %2) nounwind {
 ; CHECK-LABEL: f:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
-; CHECK-NEXT:    vpmovb2m %ymm0, %k0
-; CHECK-NEXT:    kshiftrd $3, %k0, %k1
-; CHECK-NEXT:    kmovd %k1, %eax
+; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm1
+; CHECK-NEXT:    vpmovb2m %ymm1, %k0
+; CHECK-NEXT:    vpextrb $3, %xmm0, %eax
 ; CHECK-NEXT:    vpbroadcastb %esi, %ymm0
 ; CHECK-NEXT:    vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
 ; CHECK-NEXT:    vpmovm2b %k0, %ymm0

diff  --git a/llvm/test/CodeGen/X86/vec_cast.ll b/llvm/test/CodeGen/X86/vec_cast.ll
index 0a6bc2f59b685b..e0089354cc9530 100644
--- a/llvm/test/CodeGen/X86/vec_cast.ll
+++ b/llvm/test/CodeGen/X86/vec_cast.ll
@@ -156,7 +156,7 @@ define <3 x i16> @h(<3 x i32> %a) nounwind {
 ; CHECK-WIN-LABEL: h:
 ; CHECK-WIN:       # %bb.0:
 ; CHECK-WIN-NEXT:    movdqa (%rcx), %xmm0
-; CHECK-WIN-NEXT:    movd %xmm0, %eax
+; CHECK-WIN-NEXT:    movl (%rcx), %eax
 ; CHECK-WIN-NEXT:    pextrw $2, %xmm0, %edx
 ; CHECK-WIN-NEXT:    pextrw $4, %xmm0, %ecx
 ; CHECK-WIN-NEXT:    # kill: def $ax killed $ax killed $eax


        


More information about the llvm-commits mailing list