[llvm] [AArch64][GlobalISel] FNeg constant materialization (PR #80643)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 5 00:07:03 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This is a Global ISel equivalent of #<!-- -->80641, creating fneg(movi) instead of the alternative constant pool load or gpr dup.
---
Full diff: https://github.com/llvm/llvm-project/pull/80643.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+49-16)
- (modified) llvm/test/CodeGen/AArch64/neon-mov.ll (+10-10)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 2515991fbea11..f96b6f5781479 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5786,24 +5786,57 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
if (CV->getSplatValue()) {
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
- MachineInstr *NewOp;
- bool Inv = false;
- if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
- (NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp =
- tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
- (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
- return NewOp;
+ auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr* {
+ MachineInstr *NewOp;
+ bool Inv = false;
+ if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
+ (NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp =
+ tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
+ (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
+ return NewOp;
+
+ DefBits = ~DefBits;
+ Inv = true;
+ if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp =
+ tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+ (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
+ return NewOp;
+ return nullptr;
+ };
- DefBits = ~DefBits;
- Inv = true;
- if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp =
- tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
+ if (auto *NewOp = TryMOVIWithBits(DefBits))
return NewOp;
+
+ // See if a fneg of the constant can be materialized with a MOVI, etc
+ auto TryWithFNeg = [&](APInt DefBits, int NumBits,
+ unsigned NegOpc) -> MachineInstr * {
+ // FNegate each sub-element of the constant
+ APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
+ APInt NegBits(DstSize, 0);
+ unsigned NumElts = DstSize / NumBits;
+ for (unsigned i = 0; i < NumElts; i++)
+ NegBits |= Neg << (NumBits * i);
+ NegBits = DefBits ^ NegBits;
+
+ // Try to create the new constants with MOVI, and if so generate a fneg
+ // for it.
+ if (auto *NewOp = TryMOVIWithBits(NegBits)) {
+ Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+ NewOp->getOperand(0).setReg(NewDst);
+ return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
+ }
+ return nullptr;
+ };
+ MachineInstr *R;
+ if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
+ (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
+ (STI.hasFullFP16() &&
+ (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
+ return R;
}
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 219c8b53243e6..e7b0296565196 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -125,14 +125,14 @@ define <4 x i32> @movi4s_fneg() {
;
; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
+; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
+; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: movi4s_fneg:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
+; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
+; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
; CHECK-FP16-GI-NEXT: ret
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
}
@@ -306,8 +306,8 @@ define <8 x i16> @mvni8h_neg() {
;
; CHECK-FP16-GI-LABEL: mvni8h_neg:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI32_0
-; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
+; CHECK-FP16-GI-NEXT: movi v0.8h, #240
+; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}
@@ -492,14 +492,14 @@ define <2 x double> @fmov2d_neg0() {
;
; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI51_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
+; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: fmov2d_neg0:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI51_0
-; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
+; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: ret
ret <2 x double> <double -0.0, double -0.0>
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/80643
More information about the llvm-commits
mailing list