[llvm] b34d649 - [X86] ReplaceNodeResults - adjust assert to allow XOP or GFNI subtargets to split i64 BITREVERSE nodes on 32-bit targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 24 06:41:55 PDT 2024


Author: Simon Pilgrim
Date: 2024-10-24T06:39:07-07:00
New Revision: b34d64921b2f878b6e1ac7205fc4b13d54a7d8db

URL: https://github.com/llvm/llvm-project/commit/b34d64921b2f878b6e1ac7205fc4b13d54a7d8db
DIFF: https://github.com/llvm/llvm-project/commit/b34d64921b2f878b6e1ac7205fc4b13d54a7d8db.diff

LOG: [X86] ReplaceNodeResults - adjust assert to allow XOP or GFNI subtargets to split i64 BITREVERSE nodes on 32-bit targets

Fixes #113353
Fixes #113034

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/bitreverse.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7f4dc12a20837f..a6d77873ec2901 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34158,9 +34158,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   }
   case ISD::BITREVERSE: {
     assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
-    assert(Subtarget.hasXOP() && "Expected XOP");
-    // We can use VPPERM by copying to a vector register and back. We'll need
-    // to move the scalar in two i32 pieces.
+    assert((Subtarget.hasXOP() || Subtarget.hasGFNI()) && "Expected XOP/GFNI");
+    // We can use VPPERM/GF2P8AFFINEQB by copying to a vector register and back.
+    // We'll need to move the scalar in two i32 pieces.
     Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
     return;
   }

diff  --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index e256b811ee8391..d92e1a1e7b9d49 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -2,7 +2,8 @@
 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=CHECK,X86
 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,X64
 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=CHECK,X86XOP
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI,X86GFNI
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI,X64GFNI
 
 ; These tests just check that the plumbing is in place for @llvm.bitreverse. The
 ; actual output is massive at the moment as llvm.bitreverse is not yet legal.
@@ -86,11 +87,17 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
 ; X86XOP-NEXT:    vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: test_bitreverse_v2i16:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: test_bitreverse_v2i16:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; X86GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: test_bitreverse_v2i16:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; X64GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT:    retq
   %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
   ret <2 x i16> %b
 }
@@ -170,13 +177,23 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
 ; X86XOP-NEXT:    vpextrd $1, %xmm0, %edx
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: test_bitreverse_i64:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    vmovq %rdi, %xmm0
-; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT:    vmovq %xmm0, %rax
-; GFNI-NEXT:    bswapq %rax
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: test_bitreverse_i64:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT:    vpextrd $1, %xmm0, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    vmovd %xmm0, %edx
+; X86GFNI-NEXT:    bswapl %edx
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: test_bitreverse_i64:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    vmovq %rdi, %xmm0
+; X64GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT:    vmovq %xmm0, %rax
+; X64GFNI-NEXT:    bswapq %rax
+; X64GFNI-NEXT:    retq
   %b = call i64 @llvm.bitreverse.i64(i64 %a)
   ret i64 %b
 }
@@ -235,13 +252,21 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
 ; X86XOP-NEXT:    vmovd %xmm0, %eax
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: test_bitreverse_i32:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    vmovd %edi, %xmm0
-; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT:    vmovd %xmm0, %eax
-; GFNI-NEXT:    bswapl %eax
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: test_bitreverse_i32:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT:    vmovd %xmm0, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: test_bitreverse_i32:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    vmovd %edi, %xmm0
+; X64GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT:    vmovd %xmm0, %eax
+; X64GFNI-NEXT:    bswapl %eax
+; X64GFNI-NEXT:    retq
   %b = call i32 @llvm.bitreverse.i32(i32 %a)
   ret i32 %b
 }
@@ -303,14 +328,23 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
 ; X86XOP-NEXT:    shrl $8, %eax
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: test_bitreverse_i24:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    vmovd %edi, %xmm0
-; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT:    vmovd %xmm0, %eax
-; GFNI-NEXT:    bswapl %eax
-; GFNI-NEXT:    shrl $8, %eax
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: test_bitreverse_i24:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT:    vmovd %xmm0, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    shrl $8, %eax
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: test_bitreverse_i24:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    vmovd %edi, %xmm0
+; X64GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT:    vmovd %xmm0, %eax
+; X64GFNI-NEXT:    bswapl %eax
+; X64GFNI-NEXT:    shrl $8, %eax
+; X64GFNI-NEXT:    retq
   %b = call i24 @llvm.bitreverse.i24(i24 %a)
   ret i24 %b
 }
@@ -372,14 +406,23 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
 ; X86XOP-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: test_bitreverse_i16:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    vmovd %edi, %xmm0
-; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT:    vmovd %xmm0, %eax
-; GFNI-NEXT:    rolw $8, %ax
-; GFNI-NEXT:    # kill: def $ax killed $ax killed $eax
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: test_bitreverse_i16:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT:    vmovd %xmm0, %eax
+; X86GFNI-NEXT:    rolw $8, %ax
+; X86GFNI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: test_bitreverse_i16:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    vmovd %edi, %xmm0
+; X64GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT:    vmovd %xmm0, %eax
+; X64GFNI-NEXT:    rolw $8, %ax
+; X64GFNI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64GFNI-NEXT:    retq
   %b = call i16 @llvm.bitreverse.i16(i16 %a)
   ret i16 %b
 }
@@ -430,13 +473,21 @@ define i8 @test_bitreverse_i8(i8 %a) {
 ; X86XOP-NEXT:    # kill: def $al killed $al killed $eax
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: test_bitreverse_i8:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    vmovd %edi, %xmm0
-; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT:    vmovd %xmm0, %eax
-; GFNI-NEXT:    # kill: def $al killed $al killed $eax
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: test_bitreverse_i8:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT:    vmovd %xmm0, %eax
+; X86GFNI-NEXT:    # kill: def $al killed $al killed $eax
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: test_bitreverse_i8:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    vmovd %edi, %xmm0
+; X64GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT:    vmovd %xmm0, %eax
+; X64GFNI-NEXT:    # kill: def $al killed $al killed $eax
+; X64GFNI-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %b
 }
@@ -489,14 +540,23 @@ define i4 @test_bitreverse_i4(i4 %a) {
 ; X86XOP-NEXT:    # kill: def $al killed $al killed $eax
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: test_bitreverse_i4:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    vmovd %edi, %xmm0
-; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT:    vmovd %xmm0, %eax
-; GFNI-NEXT:    shrb $4, %al
-; GFNI-NEXT:    # kill: def $al killed $al killed $eax
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: test_bitreverse_i4:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT:    vmovd %xmm0, %eax
+; X86GFNI-NEXT:    shrb $4, %al
+; X86GFNI-NEXT:    # kill: def $al killed $al killed $eax
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: test_bitreverse_i4:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    vmovd %edi, %xmm0
+; X64GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT:    vmovd %xmm0, %eax
+; X64GFNI-NEXT:    shrb $4, %al
+; X64GFNI-NEXT:    # kill: def $al killed $al killed $eax
+; X64GFNI-NEXT:    retq
   %b = call i4 @llvm.bitreverse.i4(i4 %a)
   ret i4 %b
 }
@@ -523,7 +583,7 @@ define <2 x i16> @fold_v2i16() {
 ; GFNI-LABEL: fold_v2i16:
 ; GFNI:       # %bb.0:
 ; GFNI-NEXT:    vmovss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0]
-; GFNI-NEXT:    retq
+; GFNI-NEXT:    ret{{[l|q]}}
   %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>)
   ret <2 x i16> %b
 }
@@ -574,11 +634,16 @@ define i8 @identity_i8(i8 %a) {
 ; X86XOP-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86XOP-NEXT:    retl
 ;
-; GFNI-LABEL: identity_i8:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    movl %edi, %eax
-; GFNI-NEXT:    # kill: def $al killed $al killed $eax
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: identity_i8:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86GFNI-NEXT:    retl
+;
+; X64GFNI-LABEL: identity_i8:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    movl %edi, %eax
+; X64GFNI-NEXT:    # kill: def $al killed $al killed $eax
+; X64GFNI-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
   %c = call i8 @llvm.bitreverse.i8(i8 %b)
   ret i8 %c
@@ -601,7 +666,7 @@ define <2 x i16> @identity_v2i16(<2 x i16> %a) {
 ;
 ; GFNI-LABEL: identity_v2i16:
 ; GFNI:       # %bb.0:
-; GFNI-NEXT:    retq
+; GFNI-NEXT:    ret{{[l|q]}}
   %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
   %c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b)
   ret <2 x i16> %c
@@ -1335,70 +1400,194 @@ define i528 @large_promotion(i528 %A) nounwind {
 ; X86XOP-NEXT:    popl %ebp
 ; X86XOP-NEXT:    retl $4
 ;
-; GFNI-LABEL: large_promotion:
-; GFNI:       # %bb.0:
-; GFNI-NEXT:    pushq %r14
-; GFNI-NEXT:    pushq %rbx
-; GFNI-NEXT:    movq %rdi, %rax
-; GFNI-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
-; GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %r10
-; GFNI-NEXT:    bswapq %r10
-; GFNI-NEXT:    vmovq %r9, %xmm1
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %rdi
-; GFNI-NEXT:    bswapq %rdi
-; GFNI-NEXT:    vmovq %r8, %xmm1
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %r8
-; GFNI-NEXT:    bswapq %r8
-; GFNI-NEXT:    movq %r8, %r9
-; GFNI-NEXT:    shldq $16, %rdi, %r9
-; GFNI-NEXT:    shldq $16, %r10, %rdi
-; GFNI-NEXT:    vmovq %rcx, %xmm1
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %rcx
-; GFNI-NEXT:    bswapq %rcx
-; GFNI-NEXT:    shrdq $48, %rcx, %r8
-; GFNI-NEXT:    vmovq %rdx, %xmm1
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %rdx
-; GFNI-NEXT:    bswapq %rdx
-; GFNI-NEXT:    shrdq $48, %rdx, %rcx
-; GFNI-NEXT:    vmovq %rsi, %xmm1
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %rsi
-; GFNI-NEXT:    bswapq %rsi
-; GFNI-NEXT:    shrdq $48, %rsi, %rdx
-; GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %r11
-; GFNI-NEXT:    bswapq %r11
-; GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT:    vmovq %xmm1, %rbx
-; GFNI-NEXT:    bswapq %rbx
-; GFNI-NEXT:    shrdq $48, %rbx, %r11
-; GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
-; GFNI-NEXT:    vmovq %xmm0, %r14
-; GFNI-NEXT:    bswapq %r14
-; GFNI-NEXT:    shrdq $48, %r14, %rbx
-; GFNI-NEXT:    shrdq $48, %r10, %r14
-; GFNI-NEXT:    shrq $48, %rsi
-; GFNI-NEXT:    movq %r14, 16(%rax)
-; GFNI-NEXT:    movq %rbx, 8(%rax)
-; GFNI-NEXT:    movq %r11, (%rax)
-; GFNI-NEXT:    movq %rdx, 56(%rax)
-; GFNI-NEXT:    movq %rcx, 48(%rax)
-; GFNI-NEXT:    movq %r8, 40(%rax)
-; GFNI-NEXT:    movq %r9, 32(%rax)
-; GFNI-NEXT:    movq %rdi, 24(%rax)
-; GFNI-NEXT:    movw %si, 64(%rax)
-; GFNI-NEXT:    popq %rbx
-; GFNI-NEXT:    popq %r14
-; GFNI-NEXT:    retq
+; X86GFNI-LABEL: large_promotion:
+; X86GFNI:       # %bb.0:
+; X86GFNI-NEXT:    pushl %ebp
+; X86GFNI-NEXT:    pushl %ebx
+; X86GFNI-NEXT:    pushl %edi
+; X86GFNI-NEXT:    pushl %esi
+; X86GFNI-NEXT:    subl $44, %esp
+; X86GFNI-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X86GFNI-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vmovd %xmm1, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT:    bswapl %ecx
+; X86GFNI-NEXT:    shrdl $16, %ecx, %eax
+; X86GFNI-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovd %xmm1, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    shrdl $16, %eax, %ecx
+; X86GFNI-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT:    bswapl %ecx
+; X86GFNI-NEXT:    shrdl $16, %ecx, %eax
+; X86GFNI-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovd %xmm1, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    shrdl $16, %eax, %ecx
+; X86GFNI-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT:    bswapl %ecx
+; X86GFNI-NEXT:    shrdl $16, %ecx, %eax
+; X86GFNI-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovd %xmm1, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    shrdl $16, %eax, %ecx
+; X86GFNI-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT:    bswapl %ecx
+; X86GFNI-NEXT:    shrdl $16, %ecx, %eax
+; X86GFNI-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovd %xmm1, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    shrdl $16, %eax, %ecx
+; X86GFNI-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT:    bswapl %ecx
+; X86GFNI-NEXT:    shrdl $16, %ecx, %eax
+; X86GFNI-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovd %xmm1, %eax
+; X86GFNI-NEXT:    bswapl %eax
+; X86GFNI-NEXT:    shrdl $16, %eax, %ecx
+; X86GFNI-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vpextrd $1, %xmm1, %ebp
+; X86GFNI-NEXT:    bswapl %ebp
+; X86GFNI-NEXT:    shrdl $16, %ebp, %eax
+; X86GFNI-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86GFNI-NEXT:    vmovd %xmm1, %ebx
+; X86GFNI-NEXT:    bswapl %ebx
+; X86GFNI-NEXT:    shrdl $16, %ebx, %ebp
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT:    vpextrd $1, %xmm1, %edi
+; X86GFNI-NEXT:    bswapl %edi
+; X86GFNI-NEXT:    shrdl $16, %edi, %ebx
+; X86GFNI-NEXT:    vmovd %xmm1, %edx
+; X86GFNI-NEXT:    bswapl %edx
+; X86GFNI-NEXT:    shrdl $16, %edx, %edi
+; X86GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
+; X86GFNI-NEXT:    vpextrd $1, %xmm0, %ecx
+; X86GFNI-NEXT:    bswapl %ecx
+; X86GFNI-NEXT:    shrdl $16, %ecx, %edx
+; X86GFNI-NEXT:    vmovd %xmm0, %esi
+; X86GFNI-NEXT:    bswapl %esi
+; X86GFNI-NEXT:    shrdl $16, %esi, %ecx
+; X86GFNI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86GFNI-NEXT:    movl %ecx, 60(%eax)
+; X86GFNI-NEXT:    movl %edx, 56(%eax)
+; X86GFNI-NEXT:    movl %edi, 52(%eax)
+; X86GFNI-NEXT:    movl %ebx, 48(%eax)
+; X86GFNI-NEXT:    movl %ebp, 44(%eax)
+; X86GFNI-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 40(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 36(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 32(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 28(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 24(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 20(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 16(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 12(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 8(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, 4(%eax)
+; X86GFNI-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT:    movl %ecx, (%eax)
+; X86GFNI-NEXT:    shrl $16, %esi
+; X86GFNI-NEXT:    movw %si, 64(%eax)
+; X86GFNI-NEXT:    addl $44, %esp
+; X86GFNI-NEXT:    popl %esi
+; X86GFNI-NEXT:    popl %edi
+; X86GFNI-NEXT:    popl %ebx
+; X86GFNI-NEXT:    popl %ebp
+; X86GFNI-NEXT:    retl $4
+;
+; X64GFNI-LABEL: large_promotion:
+; X64GFNI:       # %bb.0:
+; X64GFNI-NEXT:    pushq %r14
+; X64GFNI-NEXT:    pushq %rbx
+; X64GFNI-NEXT:    movq %rdi, %rax
+; X64GFNI-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X64GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %r10
+; X64GFNI-NEXT:    bswapq %r10
+; X64GFNI-NEXT:    vmovq %r9, %xmm1
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %rdi
+; X64GFNI-NEXT:    bswapq %rdi
+; X64GFNI-NEXT:    vmovq %r8, %xmm1
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %r8
+; X64GFNI-NEXT:    bswapq %r8
+; X64GFNI-NEXT:    movq %r8, %r9
+; X64GFNI-NEXT:    shldq $16, %rdi, %r9
+; X64GFNI-NEXT:    shldq $16, %r10, %rdi
+; X64GFNI-NEXT:    vmovq %rcx, %xmm1
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %rcx
+; X64GFNI-NEXT:    bswapq %rcx
+; X64GFNI-NEXT:    shrdq $48, %rcx, %r8
+; X64GFNI-NEXT:    vmovq %rdx, %xmm1
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %rdx
+; X64GFNI-NEXT:    bswapq %rdx
+; X64GFNI-NEXT:    shrdq $48, %rdx, %rcx
+; X64GFNI-NEXT:    vmovq %rsi, %xmm1
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %rsi
+; X64GFNI-NEXT:    bswapq %rsi
+; X64GFNI-NEXT:    shrdq $48, %rsi, %rdx
+; X64GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %r11
+; X64GFNI-NEXT:    bswapq %r11
+; X64GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT:    vmovq %xmm1, %rbx
+; X64GFNI-NEXT:    bswapq %rbx
+; X64GFNI-NEXT:    shrdq $48, %rbx, %r11
+; X64GFNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT:    vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
+; X64GFNI-NEXT:    vmovq %xmm0, %r14
+; X64GFNI-NEXT:    bswapq %r14
+; X64GFNI-NEXT:    shrdq $48, %r14, %rbx
+; X64GFNI-NEXT:    shrdq $48, %r10, %r14
+; X64GFNI-NEXT:    shrq $48, %rsi
+; X64GFNI-NEXT:    movq %r14, 16(%rax)
+; X64GFNI-NEXT:    movq %rbx, 8(%rax)
+; X64GFNI-NEXT:    movq %r11, (%rax)
+; X64GFNI-NEXT:    movq %rdx, 56(%rax)
+; X64GFNI-NEXT:    movq %rcx, 48(%rax)
+; X64GFNI-NEXT:    movq %r8, 40(%rax)
+; X64GFNI-NEXT:    movq %r9, 32(%rax)
+; X64GFNI-NEXT:    movq %rdi, 24(%rax)
+; X64GFNI-NEXT:    movw %si, 64(%rax)
+; X64GFNI-NEXT:    popq %rbx
+; X64GFNI-NEXT:    popq %r14
+; X64GFNI-NEXT:    retq
   %Z = call i528 @llvm.bitreverse.i528(i528 %A)
   ret i528 %Z
 }


        


More information about the llvm-commits mailing list