[llvm] Enable Custom Lowering for fabs.v8f16 on AVX (PR #71730)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 8 12:09:06 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: David Li (david-xl)
<details>
<summary>Changes</summary>
[X86]: Enable custom lowering for fabs.v8f16 on AVX
Currently, custom lowering of fabs.v8f16 requires AVX512FP16, which is too restrictive. For v8f16 fabs lowering, no instructions in AVX512FP16 are needed. Without the fix, horribly inefficient code is generated without AVX512FP16. Note instcombiner generates calls to intrinsics @<!-- -->llvm.fabs.v8f16 when simplifyping AND <8 x half> operations.
---
Full diff: https://github.com/llvm/llvm-project/pull/71730.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3)
- (modified) llvm/test/CodeGen/X86/vec_fabs.ll (+41)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 22fba5601ccfd38..b3b5a0c1b68ec82 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2238,6 +2238,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
+ if (!Subtarget.useSoftFloat() && Subtarget.hasAVX())
+ setOperationAction(ISD::FABS, MVT::v8f16, Custom);
+
if (!Subtarget.useSoftFloat() &&
(Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) {
addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass
diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll
index 982062d8907542a..08364449ab1a378 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VL
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VLDQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VLDQ
@@ -111,6 +113,45 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
}
declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
+define <8 x half> @fabs_v8f16(ptr %p) {
+; X86-AVX-LABEL: fabs_v8f16:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: movl 4(%esp), [[ADDRREG:%.*]]
+; X86-AVX-NEXT: vmovaps ([[ADDRREG]]), %xmm0
+; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX-NEXT: retl
+
+; X86-AVX2-LABEL: fabs_v8f16:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: movl 4(%esp), [[REG:%.*]]
+; X86-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-AVX2-NEXT: vpand ([[REG]]), %xmm0, %xmm0
+; X86-AVX2-NEXT: retl
+
+; X64-AVX512VL-LABEL: fabs_v8f16:
+; X64-AVX512VL: # %bb.0:
+; X64-AVX512VL-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-AVX512VL-NEXT: vpand (%rdi), %xmm0, %xmm0
+; X64-AVX512VL-NEXT: retq
+
+; X64-AVX-LABEL: fabs_v8f16:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
+; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT: retq
+
+; X64-AVX2-LABEL: fabs_v8f16:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT: retq
+
+ %v = load <8 x half>, ptr %p, align 16
+ %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
+ ret <8 x half> %nnv
+}
+declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
+
define <8 x float> @fabs_v8f32(<8 x float> %p) {
; X86-AVX-LABEL: fabs_v8f32:
; X86-AVX: # %bb.0:
``````````
</details>
https://github.com/llvm/llvm-project/pull/71730
More information about the llvm-commits
mailing list