[llvm] b34d649 - [X86] ReplaceNodeResults - adjust assert to allow XOP or GFNI subtargets to split i64 BITREVERSE nodes on 32-bit targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 06:41:55 PDT 2024
Author: Simon Pilgrim
Date: 2024-10-24T06:39:07-07:00
New Revision: b34d64921b2f878b6e1ac7205fc4b13d54a7d8db
URL: https://github.com/llvm/llvm-project/commit/b34d64921b2f878b6e1ac7205fc4b13d54a7d8db
DIFF: https://github.com/llvm/llvm-project/commit/b34d64921b2f878b6e1ac7205fc4b13d54a7d8db.diff
LOG: [X86] ReplaceNodeResults - adjust assert to allow XOP or GFNI subtargets to split i64 BITREVERSE nodes on 32-bit targets
Fixes #113353
Fixes #113034
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bitreverse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7f4dc12a20837f..a6d77873ec2901 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34158,9 +34158,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::BITREVERSE: {
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
- assert(Subtarget.hasXOP() && "Expected XOP");
- // We can use VPPERM by copying to a vector register and back. We'll need
- // to move the scalar in two i32 pieces.
+ assert((Subtarget.hasXOP() || Subtarget.hasGFNI()) && "Expected XOP/GFNI");
+ // We can use VPPERM/GF2P8AFFINEQB by copying to a vector register and back.
+ // We'll need to move the scalar in two i32 pieces.
Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
return;
}
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index e256b811ee8391..d92e1a1e7b9d49 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -2,7 +2,8 @@
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=CHECK,X86XOP
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI,X86GFNI
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI,X64GFNI
; These tests just check that the plumbing is in place for @llvm.bitreverse. The
; actual output is massive at the moment as llvm.bitreverse is not yet legal.
@@ -86,11 +87,17 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: test_bitreverse_v2i16:
-; GFNI: # %bb.0:
-; GFNI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: test_bitreverse_v2i16:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: test_bitreverse_v2i16:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT: retq
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
ret <2 x i16> %b
}
@@ -170,13 +177,23 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
; X86XOP-NEXT: vpextrd $1, %xmm0, %edx
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: test_bitreverse_i64:
-; GFNI: # %bb.0:
-; GFNI-NEXT: vmovq %rdi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT: vmovq %xmm0, %rax
-; GFNI-NEXT: bswapq %rax
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: test_bitreverse_i64:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT: vpextrd $1, %xmm0, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: vmovd %xmm0, %edx
+; X86GFNI-NEXT: bswapl %edx
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: test_bitreverse_i64:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: vmovq %rdi, %xmm0
+; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT: vmovq %xmm0, %rax
+; X64GFNI-NEXT: bswapq %rax
+; X64GFNI-NEXT: retq
%b = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %b
}
@@ -235,13 +252,21 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
; X86XOP-NEXT: vmovd %xmm0, %eax
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: test_bitreverse_i32:
-; GFNI: # %bb.0:
-; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT: vmovd %xmm0, %eax
-; GFNI-NEXT: bswapl %eax
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: test_bitreverse_i32:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT: vmovd %xmm0, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: test_bitreverse_i32:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: vmovd %edi, %xmm0
+; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT: vmovd %xmm0, %eax
+; X64GFNI-NEXT: bswapl %eax
+; X64GFNI-NEXT: retq
%b = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %b
}
@@ -303,14 +328,23 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
; X86XOP-NEXT: shrl $8, %eax
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: test_bitreverse_i24:
-; GFNI: # %bb.0:
-; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT: vmovd %xmm0, %eax
-; GFNI-NEXT: bswapl %eax
-; GFNI-NEXT: shrl $8, %eax
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: test_bitreverse_i24:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT: vmovd %xmm0, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: shrl $8, %eax
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: test_bitreverse_i24:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: vmovd %edi, %xmm0
+; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT: vmovd %xmm0, %eax
+; X64GFNI-NEXT: bswapl %eax
+; X64GFNI-NEXT: shrl $8, %eax
+; X64GFNI-NEXT: retq
%b = call i24 @llvm.bitreverse.i24(i24 %a)
ret i24 %b
}
@@ -372,14 +406,23 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; X86XOP-NEXT: # kill: def $ax killed $ax killed $eax
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: test_bitreverse_i16:
-; GFNI: # %bb.0:
-; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT: vmovd %xmm0, %eax
-; GFNI-NEXT: rolw $8, %ax
-; GFNI-NEXT: # kill: def $ax killed $ax killed $eax
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: test_bitreverse_i16:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT: vmovd %xmm0, %eax
+; X86GFNI-NEXT: rolw $8, %ax
+; X86GFNI-NEXT: # kill: def $ax killed $ax killed $eax
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: test_bitreverse_i16:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: vmovd %edi, %xmm0
+; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT: vmovd %xmm0, %eax
+; X64GFNI-NEXT: rolw $8, %ax
+; X64GFNI-NEXT: # kill: def $ax killed $ax killed $eax
+; X64GFNI-NEXT: retq
%b = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %b
}
@@ -430,13 +473,21 @@ define i8 @test_bitreverse_i8(i8 %a) {
; X86XOP-NEXT: # kill: def $al killed $al killed $eax
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: test_bitreverse_i8:
-; GFNI: # %bb.0:
-; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT: vmovd %xmm0, %eax
-; GFNI-NEXT: # kill: def $al killed $al killed $eax
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: test_bitreverse_i8:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT: vmovd %xmm0, %eax
+; X86GFNI-NEXT: # kill: def $al killed $al killed $eax
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: test_bitreverse_i8:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: vmovd %edi, %xmm0
+; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT: vmovd %xmm0, %eax
+; X64GFNI-NEXT: # kill: def $al killed $al killed $eax
+; X64GFNI-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
}
@@ -489,14 +540,23 @@ define i4 @test_bitreverse_i4(i4 %a) {
; X86XOP-NEXT: # kill: def $al killed $al killed $eax
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: test_bitreverse_i4:
-; GFNI: # %bb.0:
-; GFNI-NEXT: vmovd %edi, %xmm0
-; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
-; GFNI-NEXT: vmovd %xmm0, %eax
-; GFNI-NEXT: shrb $4, %al
-; GFNI-NEXT: # kill: def $al killed $al killed $eax
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: test_bitreverse_i4:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86GFNI-NEXT: vmovd %xmm0, %eax
+; X86GFNI-NEXT: shrb $4, %al
+; X86GFNI-NEXT: # kill: def $al killed $al killed $eax
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: test_bitreverse_i4:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: vmovd %edi, %xmm0
+; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64GFNI-NEXT: vmovd %xmm0, %eax
+; X64GFNI-NEXT: shrb $4, %al
+; X64GFNI-NEXT: # kill: def $al killed $al killed $eax
+; X64GFNI-NEXT: retq
%b = call i4 @llvm.bitreverse.i4(i4 %a)
ret i4 %b
}
@@ -523,7 +583,7 @@ define <2 x i16> @fold_v2i16() {
; GFNI-LABEL: fold_v2i16:
; GFNI: # %bb.0:
; GFNI-NEXT: vmovss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0]
-; GFNI-NEXT: retq
+; GFNI-NEXT: ret{{[l|q]}}
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>)
ret <2 x i16> %b
}
@@ -574,11 +634,16 @@ define i8 @identity_i8(i8 %a) {
; X86XOP-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86XOP-NEXT: retl
;
-; GFNI-LABEL: identity_i8:
-; GFNI: # %bb.0:
-; GFNI-NEXT: movl %edi, %eax
-; GFNI-NEXT: # kill: def $al killed $al killed $eax
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: identity_i8:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86GFNI-NEXT: retl
+;
+; X64GFNI-LABEL: identity_i8:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: movl %edi, %eax
+; X64GFNI-NEXT: # kill: def $al killed $al killed $eax
+; X64GFNI-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
%c = call i8 @llvm.bitreverse.i8(i8 %b)
ret i8 %c
@@ -601,7 +666,7 @@ define <2 x i16> @identity_v2i16(<2 x i16> %a) {
;
; GFNI-LABEL: identity_v2i16:
; GFNI: # %bb.0:
-; GFNI-NEXT: retq
+; GFNI-NEXT: ret{{[l|q]}}
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
%c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b)
ret <2 x i16> %c
@@ -1335,70 +1400,194 @@ define i528 @large_promotion(i528 %A) nounwind {
; X86XOP-NEXT: popl %ebp
; X86XOP-NEXT: retl $4
;
-; GFNI-LABEL: large_promotion:
-; GFNI: # %bb.0:
-; GFNI-NEXT: pushq %r14
-; GFNI-NEXT: pushq %rbx
-; GFNI-NEXT: movq %rdi, %rax
-; GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
-; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %r10
-; GFNI-NEXT: bswapq %r10
-; GFNI-NEXT: vmovq %r9, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %rdi
-; GFNI-NEXT: bswapq %rdi
-; GFNI-NEXT: vmovq %r8, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %r8
-; GFNI-NEXT: bswapq %r8
-; GFNI-NEXT: movq %r8, %r9
-; GFNI-NEXT: shldq $16, %rdi, %r9
-; GFNI-NEXT: shldq $16, %r10, %rdi
-; GFNI-NEXT: vmovq %rcx, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %rcx
-; GFNI-NEXT: bswapq %rcx
-; GFNI-NEXT: shrdq $48, %rcx, %r8
-; GFNI-NEXT: vmovq %rdx, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %rdx
-; GFNI-NEXT: bswapq %rdx
-; GFNI-NEXT: shrdq $48, %rdx, %rcx
-; GFNI-NEXT: vmovq %rsi, %xmm1
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %rsi
-; GFNI-NEXT: bswapq %rsi
-; GFNI-NEXT: shrdq $48, %rsi, %rdx
-; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %r11
-; GFNI-NEXT: bswapq %r11
-; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
-; GFNI-NEXT: vmovq %xmm1, %rbx
-; GFNI-NEXT: bswapq %rbx
-; GFNI-NEXT: shrdq $48, %rbx, %r11
-; GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
-; GFNI-NEXT: vmovq %xmm0, %r14
-; GFNI-NEXT: bswapq %r14
-; GFNI-NEXT: shrdq $48, %r14, %rbx
-; GFNI-NEXT: shrdq $48, %r10, %r14
-; GFNI-NEXT: shrq $48, %rsi
-; GFNI-NEXT: movq %r14, 16(%rax)
-; GFNI-NEXT: movq %rbx, 8(%rax)
-; GFNI-NEXT: movq %r11, (%rax)
-; GFNI-NEXT: movq %rdx, 56(%rax)
-; GFNI-NEXT: movq %rcx, 48(%rax)
-; GFNI-NEXT: movq %r8, 40(%rax)
-; GFNI-NEXT: movq %r9, 32(%rax)
-; GFNI-NEXT: movq %rdi, 24(%rax)
-; GFNI-NEXT: movw %si, 64(%rax)
-; GFNI-NEXT: popq %rbx
-; GFNI-NEXT: popq %r14
-; GFNI-NEXT: retq
+; X86GFNI-LABEL: large_promotion:
+; X86GFNI: # %bb.0:
+; X86GFNI-NEXT: pushl %ebp
+; X86GFNI-NEXT: pushl %ebx
+; X86GFNI-NEXT: pushl %edi
+; X86GFNI-NEXT: pushl %esi
+; X86GFNI-NEXT: subl $44, %esp
+; X86GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X86GFNI-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vmovd %xmm1, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT: bswapl %ecx
+; X86GFNI-NEXT: shrdl $16, %ecx, %eax
+; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovd %xmm1, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: shrdl $16, %eax, %ecx
+; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT: bswapl %ecx
+; X86GFNI-NEXT: shrdl $16, %ecx, %eax
+; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovd %xmm1, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: shrdl $16, %eax, %ecx
+; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT: bswapl %ecx
+; X86GFNI-NEXT: shrdl $16, %ecx, %eax
+; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovd %xmm1, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: shrdl $16, %eax, %ecx
+; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT: bswapl %ecx
+; X86GFNI-NEXT: shrdl $16, %ecx, %eax
+; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovd %xmm1, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: shrdl $16, %eax, %ecx
+; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx
+; X86GFNI-NEXT: bswapl %ecx
+; X86GFNI-NEXT: shrdl $16, %ecx, %eax
+; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovd %xmm1, %eax
+; X86GFNI-NEXT: bswapl %eax
+; X86GFNI-NEXT: shrdl $16, %eax, %ecx
+; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vpextrd $1, %xmm1, %ebp
+; X86GFNI-NEXT: bswapl %ebp
+; X86GFNI-NEXT: shrdl $16, %ebp, %eax
+; X86GFNI-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86GFNI-NEXT: vmovd %xmm1, %ebx
+; X86GFNI-NEXT: bswapl %ebx
+; X86GFNI-NEXT: shrdl $16, %ebx, %ebp
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X86GFNI-NEXT: vpextrd $1, %xmm1, %edi
+; X86GFNI-NEXT: bswapl %edi
+; X86GFNI-NEXT: shrdl $16, %edi, %ebx
+; X86GFNI-NEXT: vmovd %xmm1, %edx
+; X86GFNI-NEXT: bswapl %edx
+; X86GFNI-NEXT: shrdl $16, %edx, %edi
+; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
+; X86GFNI-NEXT: vpextrd $1, %xmm0, %ecx
+; X86GFNI-NEXT: bswapl %ecx
+; X86GFNI-NEXT: shrdl $16, %ecx, %edx
+; X86GFNI-NEXT: vmovd %xmm0, %esi
+; X86GFNI-NEXT: bswapl %esi
+; X86GFNI-NEXT: shrdl $16, %esi, %ecx
+; X86GFNI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86GFNI-NEXT: movl %ecx, 60(%eax)
+; X86GFNI-NEXT: movl %edx, 56(%eax)
+; X86GFNI-NEXT: movl %edi, 52(%eax)
+; X86GFNI-NEXT: movl %ebx, 48(%eax)
+; X86GFNI-NEXT: movl %ebp, 44(%eax)
+; X86GFNI-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 40(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 36(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 32(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 28(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 24(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 20(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 16(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 12(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 8(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, 4(%eax)
+; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86GFNI-NEXT: movl %ecx, (%eax)
+; X86GFNI-NEXT: shrl $16, %esi
+; X86GFNI-NEXT: movw %si, 64(%eax)
+; X86GFNI-NEXT: addl $44, %esp
+; X86GFNI-NEXT: popl %esi
+; X86GFNI-NEXT: popl %edi
+; X86GFNI-NEXT: popl %ebx
+; X86GFNI-NEXT: popl %ebp
+; X86GFNI-NEXT: retl $4
+;
+; X64GFNI-LABEL: large_promotion:
+; X64GFNI: # %bb.0:
+; X64GFNI-NEXT: pushq %r14
+; X64GFNI-NEXT: pushq %rbx
+; X64GFNI-NEXT: movq %rdi, %rax
+; X64GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %r10
+; X64GFNI-NEXT: bswapq %r10
+; X64GFNI-NEXT: vmovq %r9, %xmm1
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %rdi
+; X64GFNI-NEXT: bswapq %rdi
+; X64GFNI-NEXT: vmovq %r8, %xmm1
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %r8
+; X64GFNI-NEXT: bswapq %r8
+; X64GFNI-NEXT: movq %r8, %r9
+; X64GFNI-NEXT: shldq $16, %rdi, %r9
+; X64GFNI-NEXT: shldq $16, %r10, %rdi
+; X64GFNI-NEXT: vmovq %rcx, %xmm1
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %rcx
+; X64GFNI-NEXT: bswapq %rcx
+; X64GFNI-NEXT: shrdq $48, %rcx, %r8
+; X64GFNI-NEXT: vmovq %rdx, %xmm1
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %rdx
+; X64GFNI-NEXT: bswapq %rdx
+; X64GFNI-NEXT: shrdq $48, %rdx, %rcx
+; X64GFNI-NEXT: vmovq %rsi, %xmm1
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %rsi
+; X64GFNI-NEXT: bswapq %rsi
+; X64GFNI-NEXT: shrdq $48, %rsi, %rdx
+; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %r11
+; X64GFNI-NEXT: bswapq %r11
+; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1
+; X64GFNI-NEXT: vmovq %xmm1, %rbx
+; X64GFNI-NEXT: bswapq %rbx
+; X64GFNI-NEXT: shrdq $48, %rbx, %r11
+; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0
+; X64GFNI-NEXT: vmovq %xmm0, %r14
+; X64GFNI-NEXT: bswapq %r14
+; X64GFNI-NEXT: shrdq $48, %r14, %rbx
+; X64GFNI-NEXT: shrdq $48, %r10, %r14
+; X64GFNI-NEXT: shrq $48, %rsi
+; X64GFNI-NEXT: movq %r14, 16(%rax)
+; X64GFNI-NEXT: movq %rbx, 8(%rax)
+; X64GFNI-NEXT: movq %r11, (%rax)
+; X64GFNI-NEXT: movq %rdx, 56(%rax)
+; X64GFNI-NEXT: movq %rcx, 48(%rax)
+; X64GFNI-NEXT: movq %r8, 40(%rax)
+; X64GFNI-NEXT: movq %r9, 32(%rax)
+; X64GFNI-NEXT: movq %rdi, 24(%rax)
+; X64GFNI-NEXT: movw %si, 64(%rax)
+; X64GFNI-NEXT: popq %rbx
+; X64GFNI-NEXT: popq %r14
+; X64GFNI-NEXT: retq
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
ret i528 %Z
}
More information about the llvm-commits
mailing list