[llvm-branch-commits] [llvm] ca8de9a - [X86] Fix crash with i64 bitreverse on 32-bit targets with XOP.
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 14 13:42:35 PST 2020
Author: Craig Topper
Date: 2020-12-14T16:41:26-05:00
New Revision: ca8de9ad8895ab1368135f6fc63f29fe92b75c76
URL: https://github.com/llvm/llvm-project/commit/ca8de9ad8895ab1368135f6fc63f29fe92b75c76
DIFF: https://github.com/llvm/llvm-project/commit/ca8de9ad8895ab1368135f6fc63f29fe92b75c76.diff
LOG: [X86] Fix crash with i64 bitreverse on 32-bit targets with XOP.
We unconditionally marked i64 as Custom, but did not install a
handler in ReplaceNodeResults when i64 isn't legal type. This
leads to ReplaceNodeResults asserting.
We have two options to fix this. Only mark i64 as Custom on
64-bit targets and let it expand to two i32 bitreverses which
each need a VPPERM. Or the other option is to add the Custom
handling to ReplaceNodeResults. This is what I went with.
(cherry picked from commit 57c0c4a27575840ae0a48eb9f8455a5ed087c857)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bitreverse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f5b704ebbe9d..56690c3c555b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30285,6 +30285,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(V);
return;
}
+ case ISD::BITREVERSE:
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ assert(Subtarget.hasXOP() && "Expected XOP");
+ // We can use VPPERM by copying to a vector register and back. We'll need
+ // to move the scalar in two i32 pieces.
+ Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
+ return;
}
}
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 343d9fb2da2d..8e2f6f9b463b 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=X86XOP
; These tests just check that the plumbing is in place for @llvm.bitreverse. The
; actual output is massive at the moment as llvm.bitreverse is not yet legal.
@@ -75,6 +76,11 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
; X64-NEXT: psrlw $1, %xmm0
; X64-NEXT: por %xmm1, %xmm0
; X64-NEXT: retq
+;
+; X86XOP-LABEL: test_bitreverse_v2i16:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86XOP-NEXT: retl
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
ret <2 x i16> %b
}
@@ -145,6 +151,14 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
; X64-NEXT: shrq %rdx
; X64-NEXT: leaq (%rdx,%rcx,2), %rax
; X64-NEXT: retq
+;
+; X86XOP-LABEL: test_bitreverse_i64:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86XOP-NEXT: vmovd %xmm0, %eax
+; X86XOP-NEXT: vpextrd $1, %xmm0, %edx
+; X86XOP-NEXT: retl
%b = call i64 @llvm.bitreverse.i64(i64 %a)
ret i64 %b
}
@@ -195,6 +209,13 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
; X64-NEXT: shrl %eax
; X64-NEXT: leal (%rax,%rcx,2), %eax
; X64-NEXT: retq
+;
+; X86XOP-LABEL: test_bitreverse_i32:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86XOP-NEXT: vmovd %xmm0, %eax
+; X86XOP-NEXT: retl
%b = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %b
}
@@ -247,6 +268,14 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
; X64-NEXT: leal (%rax,%rcx,2), %eax
; X64-NEXT: shrl $8, %eax
; X64-NEXT: retq
+;
+; X86XOP-LABEL: test_bitreverse_i24:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86XOP-NEXT: vmovd %xmm0, %eax
+; X86XOP-NEXT: shrl $8, %eax
+; X86XOP-NEXT: retl
%b = call i24 @llvm.bitreverse.i24(i24 %a)
ret i24 %b
}
@@ -299,6 +328,14 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; X64-NEXT: leal (%rax,%rcx,2), %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; X86XOP-LABEL: test_bitreverse_i16:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86XOP-NEXT: vmovd %xmm0, %eax
+; X86XOP-NEXT: # kill: def $ax killed $ax killed $eax
+; X86XOP-NEXT: retl
%b = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %b
}
@@ -342,6 +379,14 @@ define i8 @test_bitreverse_i8(i8 %a) {
; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; X86XOP-LABEL: test_bitreverse_i8:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86XOP-NEXT: vmovd %xmm0, %eax
+; X86XOP-NEXT: # kill: def $al killed $al killed $eax
+; X86XOP-NEXT: retl
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
}
@@ -387,6 +432,15 @@ define i4 @test_bitreverse_i4(i4 %a) {
; X64-NEXT: shrb $4, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; X86XOP-LABEL: test_bitreverse_i4:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
+; X86XOP-NEXT: vmovd %xmm0, %eax
+; X86XOP-NEXT: shrb $4, %al
+; X86XOP-NEXT: # kill: def $al killed $al killed $eax
+; X86XOP-NEXT: retl
%b = call i4 @llvm.bitreverse.i4(i4 %a)
ret i4 %b
}
@@ -404,6 +458,11 @@ define <2 x i16> @fold_v2i16() {
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u>
; X64-NEXT: retq
+;
+; X86XOP-LABEL: fold_v2i16:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: vmovaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u>
+; X86XOP-NEXT: retl
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>)
ret <2 x i16> %b
}
@@ -418,6 +477,11 @@ define i24 @fold_i24() {
; X64: # %bb.0:
; X64-NEXT: movl $2048, %eax # imm = 0x800
; X64-NEXT: retq
+;
+; X86XOP-LABEL: fold_i24:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: movl $2048, %eax # imm = 0x800
+; X86XOP-NEXT: retl
%b = call i24 @llvm.bitreverse.i24(i24 4096)
ret i24 %b
}
@@ -432,6 +496,11 @@ define i8 @fold_i8() {
; X64: # %bb.0:
; X64-NEXT: movb $-16, %al
; X64-NEXT: retq
+;
+; X86XOP-LABEL: fold_i8:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: movb $-16, %al
+; X86XOP-NEXT: retl
%b = call i8 @llvm.bitreverse.i8(i8 15)
ret i8 %b
}
@@ -446,6 +515,11 @@ define i4 @fold_i4() {
; X64: # %bb.0:
; X64-NEXT: movb $1, %al
; X64-NEXT: retq
+;
+; X86XOP-LABEL: fold_i4:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: movb $1, %al
+; X86XOP-NEXT: retl
%b = call i4 @llvm.bitreverse.i4(i4 8)
ret i4 %b
}
@@ -463,6 +537,11 @@ define i8 @identity_i8(i8 %a) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; X86XOP-LABEL: identity_i8:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86XOP-NEXT: retl
%b = call i8 @llvm.bitreverse.i8(i8 %a)
%c = call i8 @llvm.bitreverse.i8(i8 %b)
ret i8 %c
@@ -478,6 +557,10 @@ define <2 x i16> @identity_v2i16(<2 x i16> %a) {
; X64-LABEL: identity_v2i16:
; X64: # %bb.0:
; X64-NEXT: retq
+;
+; X86XOP-LABEL: identity_v2i16:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: retl
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
%c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b)
ret <2 x i16> %c
@@ -493,6 +576,10 @@ define i8 @undef_i8() {
; X64-LABEL: undef_i8:
; X64: # %bb.0:
; X64-NEXT: retq
+;
+; X86XOP-LABEL: undef_i8:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: retl
%b = call i8 @llvm.bitreverse.i8(i8 undef)
ret i8 %b
}
@@ -505,6 +592,10 @@ define <2 x i16> @undef_v2i16() {
; X64-LABEL: undef_v2i16:
; X64: # %bb.0:
; X64-NEXT: retq
+;
+; X86XOP-LABEL: undef_v2i16:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: retl
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
ret <2 x i16> %b
}
@@ -1122,6 +1213,113 @@ define i528 @large_promotion(i528 %A) nounwind {
; X64-NEXT: popq %r15
; X64-NEXT: popq %rbp
; X64-NEXT: retq
+;
+; X86XOP-LABEL: large_promotion:
+; X86XOP: # %bb.0:
+; X86XOP-NEXT: pushl %ebp
+; X86XOP-NEXT: pushl %ebx
+; X86XOP-NEXT: pushl %edi
+; X86XOP-NEXT: pushl %esi
+; X86XOP-NEXT: subl $44, %esp
+; X86XOP-NEXT: vmovdqa {{.*#+}} xmm0 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88]
+; X86XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vmovd %xmm1, %ecx
+; X86XOP-NEXT: shrdl $16, %ecx, %eax
+; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
+; X86XOP-NEXT: shrdl $16, %eax, %ecx
+; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vmovd %xmm1, %ecx
+; X86XOP-NEXT: shrdl $16, %ecx, %eax
+; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
+; X86XOP-NEXT: shrdl $16, %eax, %ecx
+; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vmovd %xmm1, %ecx
+; X86XOP-NEXT: shrdl $16, %ecx, %eax
+; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
+; X86XOP-NEXT: shrdl $16, %eax, %ecx
+; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vmovd %xmm1, %ecx
+; X86XOP-NEXT: shrdl $16, %ecx, %eax
+; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
+; X86XOP-NEXT: shrdl $16, %eax, %ecx
+; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vmovd %xmm1, %ecx
+; X86XOP-NEXT: shrdl $16, %ecx, %eax
+; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
+; X86XOP-NEXT: shrdl $16, %eax, %ecx
+; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vmovd %xmm1, %ebp
+; X86XOP-NEXT: shrdl $16, %ebp, %eax
+; X86XOP-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86XOP-NEXT: vpextrd $1, %xmm1, %ebx
+; X86XOP-NEXT: shrdl $16, %ebx, %ebp
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
+; X86XOP-NEXT: vmovd %xmm1, %esi
+; X86XOP-NEXT: shrdl $16, %esi, %ebx
+; X86XOP-NEXT: vpextrd $1, %xmm1, %edx
+; X86XOP-NEXT: shrdl $16, %edx, %esi
+; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm0
+; X86XOP-NEXT: vmovd %xmm0, %ecx
+; X86XOP-NEXT: shrdl $16, %ecx, %edx
+; X86XOP-NEXT: vpextrd $1, %xmm0, %edi
+; X86XOP-NEXT: shrdl $16, %edi, %ecx
+; X86XOP-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86XOP-NEXT: movl %ecx, 60(%eax)
+; X86XOP-NEXT: movl %edx, 56(%eax)
+; X86XOP-NEXT: movl %esi, 52(%eax)
+; X86XOP-NEXT: movl %ebx, 48(%eax)
+; X86XOP-NEXT: movl %ebp, 44(%eax)
+; X86XOP-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 40(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 36(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 32(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 28(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 24(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 20(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 16(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 12(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 8(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, 4(%eax)
+; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86XOP-NEXT: movl %ecx, (%eax)
+; X86XOP-NEXT: shrl $16, %edi
+; X86XOP-NEXT: movw %di, 64(%eax)
+; X86XOP-NEXT: addl $44, %esp
+; X86XOP-NEXT: popl %esi
+; X86XOP-NEXT: popl %edi
+; X86XOP-NEXT: popl %ebx
+; X86XOP-NEXT: popl %ebp
+; X86XOP-NEXT: retl $4
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
ret i528 %Z
}
More information about the llvm-branch-commits
mailing list