[llvm] d0742ac - [X86][CodeGen]Fix extract f16 from big vectors
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 10 18:19:20 PST 2023
Author: Wang, Xin10
Date: 2023-01-10T18:18:50-08:00
New Revision: d0742ac2e531e3dc38ca22b200059cfaef85d838
URL: https://github.com/llvm/llvm-project/commit/d0742ac2e531e3dc38ca22b200059cfaef85d838
DIFF: https://github.com/llvm/llvm-project/commit/d0742ac2e531e3dc38ca22b200059cfaef85d838.diff
LOG: [X86][CodeGen]Fix extract f16 from big vectors
When use llc -mattr=+avx512fp16, it will crash.
```
define half @test(<64 x half> %x, i64 %idx){
%res = extractelement <64 x half> %x, i64 %idx
ret half %res
}
```
The root cause is when we enable avx512fp16 we lose custom handler
for extract f16 from big vectors which is not loaded from pointer.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D141348
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512fp16-mov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 58d8da16401c..bed3e94f9dde 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34113,7 +34113,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(V);
return;
}
- case ISD::BITREVERSE:
+ case ISD::BITREVERSE: {
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
assert(Subtarget.hasXOP() && "Expected XOP");
// We can use VPPERM by copying to a vector register and back. We'll need
@@ -34121,6 +34121,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
return;
}
+ case ISD::EXTRACT_VECTOR_ELT: {
+ // f16 = extract vXf16 %vec, i64 %idx
+ assert(N->getSimpleValueType(0) == MVT::f16 &&
+ "Unexpected Value type of EXTRACT_VECTOR_ELT!");
+ assert(Subtarget.hasFP16() && "Expected FP16");
+ SDValue VecOp = N->getOperand(0);
+ EVT ExtVT = VecOp.getValueType().changeVectorElementTypeToInteger();
+ SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0));
+ Split = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Split,
+ N->getOperand(1));
+ Split = DAG.getBitcast(MVT::f16, Split);
+ Results.push_back(Split);
+ return;
+ }
+ }
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index 09706f07d5c5..9cdb47b4a21f 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -1349,6 +1349,74 @@ define half @extract_f16_7(<8 x half> %x) {
ret half %res
}
+define half @extract_f16_8(<32 x half> %x, i64 %idx) nounwind {
+; X64-LABEL: extract_f16_8:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: andq $-64, %rsp
+; X64-NEXT: subq $128, %rsp
+; X64-NEXT: andl $31, %edi
+; X64-NEXT: vmovaps %zmm0, (%rsp)
+; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0
+; X64-NEXT: movq %rbp, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+;
+; X86-LABEL: extract_f16_8:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-64, %esp
+; X86-NEXT: subl $128, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: andl $31, %eax
+; X86-NEXT: vmovaps %zmm0, (%esp)
+; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+ %res = extractelement <32 x half> %x, i64 %idx
+ ret half %res
+}
+
+define half @extract_f16_9(<64 x half> %x, i64 %idx) nounwind {
+; X64-LABEL: extract_f16_9:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: andq $-64, %rsp
+; X64-NEXT: subq $192, %rsp
+; X64-NEXT: andl $63, %edi
+; X64-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
+; X64-NEXT: vmovaps %zmm0, (%rsp)
+; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0
+; X64-NEXT: movq %rbp, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+;
+; X86-LABEL: extract_f16_9:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: andl $-64, %esp
+; X86-NEXT: subl $192, %esp
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: andl $63, %eax
+; X86-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp)
+; X86-NEXT: vmovaps %zmm0, (%esp)
+; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+ %res = extractelement <64 x half> %x, i64 %idx
+ ret half %res
+}
+
define i16 @extract_i16_0(<8 x i16> %x) {
; CHECK-LABEL: extract_i16_0:
; CHECK: # %bb.0:
@@ -1985,10 +2053,10 @@ define void @pr52560(i8 %0, <2 x i16> %1, ptr %c) nounwind {
; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: vmovw %xmm0, %eax
; X64-NEXT: testw %ax, %ax
-; X64-NEXT: je .LBB121_2
+; X64-NEXT: je .LBB123_2
; X64-NEXT: # %bb.1: # %for.body.preheader
; X64-NEXT: movb $0, (%rsi)
-; X64-NEXT: .LBB121_2: # %for.end
+; X64-NEXT: .LBB123_2: # %for.end
; X64-NEXT: retq
;
; X86-LABEL: pr52560:
@@ -2000,11 +2068,11 @@ define void @pr52560(i8 %0, <2 x i16> %1, ptr %c) nounwind {
; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: vmovw %xmm0, %eax
; X86-NEXT: testw %ax, %ax
-; X86-NEXT: je .LBB121_2
+; X86-NEXT: je .LBB123_2
; X86-NEXT: # %bb.1: # %for.body.preheader
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, (%eax)
-; X86-NEXT: .LBB121_2: # %for.end
+; X86-NEXT: .LBB123_2: # %for.end
; X86-NEXT: retl
entry:
%conv = sext i8 %0 to i16
More information about the llvm-commits
mailing list