[llvm] c06a61f - [X86] narrowShuffle - only narrow from legal vector types

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri May 12 08:41:35 PDT 2023


Author: Simon Pilgrim
Date: 2023-05-12T16:41:20+01:00
New Revision: c06a61f78eeba8e36fa845845f90bc17eb19672a

URL: https://github.com/llvm/llvm-project/commit/c06a61f78eeba8e36fa845845f90bc17eb19672a
DIFF: https://github.com/llvm/llvm-project/commit/c06a61f78eeba8e36fa845845f90bc17eb19672a.diff

LOG: [X86] narrowShuffle - only narrow from legal vector types

Fixes #62653

Added: 
    llvm/test/CodeGen/X86/pr62653.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/phaddsub-extract.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1325ee636102..013f444ad23d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42788,9 +42788,9 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
 /// low half of each source vector and does not set any high half elements in
 /// the destination vector, narrow the shuffle to half its original size.
 static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
-  if (!Shuf->getValueType(0).isSimple())
+  EVT VT = Shuf->getValueType(0);
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(Shuf->getValueType(0)))
     return SDValue();
-  MVT VT = Shuf->getSimpleValueType(0);
   if (!VT.is256BitVector() && !VT.is512BitVector())
     return SDValue();
 
@@ -42814,7 +42814,7 @@ static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
   // the wide shuffle that we started with.
   return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
                                Shuf->getOperand(1), HalfMask, HalfIdx1,
-                               HalfIdx2, false, DAG, /*UseConcat*/true);
+                               HalfIdx2, false, DAG, /*UseConcat*/ true);
 }
 
 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,

diff  --git a/llvm/test/CodeGen/X86/phaddsub-extract.ll b/llvm/test/CodeGen/X86/phaddsub-extract.ll
index 6a1156f341c6..b38a10c7e426 100644
--- a/llvm/test/CodeGen/X86/phaddsub-extract.ll
+++ b/llvm/test/CodeGen/X86/phaddsub-extract.ll
@@ -1840,8 +1840,7 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
 ; AVX2-FAST:       # %bb.0:
 ; AVX2-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; AVX2-FAST-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
-; AVX2-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX2-FAST-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT:    vphsubd %xmm0, %xmm0, %xmm0
 ; AVX2-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/pr62653.ll b/llvm/test/CodeGen/X86/pr62653.ll
new file mode 100644
index 000000000000..0a03c1831f65
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr62653.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
+
+define <64 x i4> @pr62653(<64 x i4> %a0) nounwind {
+; CHECK-LABEL: pr62653:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $r9d killed $r9d def $r9
+; CHECK-NEXT:    # kill: def $r8d killed $r8d def $r8
+; CHECK-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT:    andl $15, %edi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT:    andl $15, %r10d
+; CHECK-NEXT:    shlq $4, %r10
+; CHECK-NEXT:    orq %rdi, %r10
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT:    andl $15, %edi
+; CHECK-NEXT:    shlq $8, %rdi
+; CHECK-NEXT:    orq %r10, %rdi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT:    andl $15, %r10d
+; CHECK-NEXT:    shlq $12, %r10
+; CHECK-NEXT:    orq %rdi, %r10
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-NEXT:    andl $15, %r11d
+; CHECK-NEXT:    shlq $16, %r11
+; CHECK-NEXT:    orq %r10, %r11
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT:    andl $15, %edi
+; CHECK-NEXT:    shlq $20, %rdi
+; CHECK-NEXT:    orq %r11, %rdi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT:    andl $15, %r10d
+; CHECK-NEXT:    shlq $24, %r10
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-NEXT:    andl $15, %r11d
+; CHECK-NEXT:    shlq $28, %r11
+; CHECK-NEXT:    orq %r10, %r11
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT:    andl $15, %r10d
+; CHECK-NEXT:    shlq $32, %r10
+; CHECK-NEXT:    orq %r11, %r10
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-NEXT:    andl $15, %r11d
+; CHECK-NEXT:    shlq $36, %r11
+; CHECK-NEXT:    orq %r10, %r11
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT:    andl $15, %r10d
+; CHECK-NEXT:    shlq $40, %r10
+; CHECK-NEXT:    orq %r11, %r10
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-NEXT:    andl $15, %r11d
+; CHECK-NEXT:    shlq $44, %r11
+; CHECK-NEXT:    orq %r10, %r11
+; CHECK-NEXT:    orq %rdi, %r11
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT:    andl $15, %edi
+; CHECK-NEXT:    shlq $48, %rdi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-NEXT:    andl $15, %r10d
+; CHECK-NEXT:    shlq $52, %r10
+; CHECK-NEXT:    orq %rdi, %r10
+; CHECK-NEXT:    orq %r11, %r10
+; CHECK-NEXT:    movq %r10, 8(%rax)
+; CHECK-NEXT:    andl $15, %esi
+; CHECK-NEXT:    andl $15, %edx
+; CHECK-NEXT:    shlq $4, %rdx
+; CHECK-NEXT:    orq %rsi, %rdx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    shlq $8, %rcx
+; CHECK-NEXT:    orq %rdx, %rcx
+; CHECK-NEXT:    andl $15, %r8d
+; CHECK-NEXT:    shlq $12, %r8
+; CHECK-NEXT:    orq %rcx, %r8
+; CHECK-NEXT:    andl $15, %r9d
+; CHECK-NEXT:    shlq $16, %r9
+; CHECK-NEXT:    orq %r8, %r9
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    shlq $20, %rcx
+; CHECK-NEXT:    orq %r9, %rcx
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT:    andl $15, %esi
+; CHECK-NEXT:    shlq $24, %rsi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT:    andl $15, %edx
+; CHECK-NEXT:    shlq $28, %rdx
+; CHECK-NEXT:    orq %rsi, %rdx
+; CHECK-NEXT:    orq %rcx, %rdx
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    shlq $32, %rcx
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT:    andl $15, %esi
+; CHECK-NEXT:    shlq $36, %rsi
+; CHECK-NEXT:    orq %rcx, %rsi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    shlq $40, %rcx
+; CHECK-NEXT:    orq %rsi, %rcx
+; CHECK-NEXT:    orq %rdx, %rcx
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT:    andl $15, %edx
+; CHECK-NEXT:    shlq $44, %rdx
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT:    andl $15, %esi
+; CHECK-NEXT:    shlq $48, %rsi
+; CHECK-NEXT:    orq %rdx, %rsi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT:    andl $15, %edx
+; CHECK-NEXT:    shlq $52, %rdx
+; CHECK-NEXT:    orq %rsi, %rdx
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT:    andl $15, %esi
+; CHECK-NEXT:    shlq $56, %rsi
+; CHECK-NEXT:    orq %rdx, %rsi
+; CHECK-NEXT:    orq %rcx, %rsi
+; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    shlq $60, %rcx
+; CHECK-NEXT:    orq %rsi, %rcx
+; CHECK-NEXT:    movq %rcx, (%rax)
+; CHECK-NEXT:    retq
+  %res = shufflevector <64 x i4> %a0, <64 x i4> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 64, i32 65, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  ret <64 x i4> %res
+}


        


More information about the llvm-commits mailing list