[llvm] 09dd4d8 - DAG: Remove hasBitPreservingFPLogic
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 06:25:32 PST 2023
Author: Matt Arsenault
Date: 2023-02-14T10:25:24-04:00
New Revision: 09dd4d870e192da73594b713bb201859e5a09efb
URL: https://github.com/llvm/llvm-project/commit/09dd4d870e192da73594b713bb201859e5a09efb
DIFF: https://github.com/llvm/llvm-project/commit/09dd4d870e192da73594b713bb201859e5a09efb.diff
LOG: DAG: Remove hasBitPreservingFPLogic
This doesn't make sense as an option. fneg and fabs are bit
preserving by definition. If a target has some fneg or fabs
instruction that are not bitpreserving it's incorrect to lower
fneg/fabs to use it.
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.h
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/SystemZ/SystemZISelLowering.h
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/AMDGPU/fneg.ll
llvm/test/CodeGen/PowerPC/fabs.ll
llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll
llvm/test/CodeGen/X86/fp128-i128.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 567121933da1f..bbb854a937bda 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -644,14 +644,6 @@ class TargetLoweringBase {
/// gen prepare.
virtual bool preferZeroCompareBranch() const { return false; }
- /// Return true if it is safe to transform an integer-domain bitwise operation
- /// into the equivalent floating-point operation. This should be set to true
- /// if the target has IEEE-754-compliant fabs/fneg operations for the input
- /// type.
- virtual bool hasBitPreservingFPLogic(EVT VT) const {
- return false;
- }
-
/// Return true if it is cheaper to split the store of a merged int val
/// from a pair of smaller values into multiple stores.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 65faaae95acd3..cd9eceb21536c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -585,6 +585,9 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI);
+
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
@@ -14399,18 +14402,19 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
}
-static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
// If this is not a bitcast to an FP type or if the target doesn't have
// IEEE754-compliant FP logic, we're done.
EVT VT = N->getValueType(0);
- if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
+ SDValue N0 = N->getOperand(0);
+ EVT SourceVT = N0.getValueType();
+
+ if (!VT.isFloatingPoint())
return SDValue();
// TODO: Handle cases where the integer constant is a
diff erent scalar
// bitwidth to the FP.
- SDValue N0 = N->getOperand(0);
- EVT SourceVT = N0.getValueType();
if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
return SDValue();
@@ -14433,6 +14437,9 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+ if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
+ return SDValue();
+
// This needs to be the inverse of logic in foldSignChangeInBitcast.
// FIXME: I don't think looking for bitcast intrinsically makes sense, but
// removing this would require more changes.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 95b9c4d3a0a13..163e736718596 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -840,11 +840,6 @@ class AArch64TargetLowering : public TargetLowering {
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override {
- // FIXME: Is this always true? It should be true for vectors at least.
- return VT == MVT::f32 || VT == MVT::f64;
- }
-
bool supportSplitCSR(MachineFunction *MF) const override {
return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7c8e0c952ac74..85dca42483941 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4482,10 +4482,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
}
-bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- return isTypeLegal(VT.getScalarType());
-}
-
bool SITargetLowering::hasAtomicFaddRtnForTy(SDValue &Op) const {
switch (Op.getValue(0).getSimpleValueType().SimpleTy) {
case MVT::f32:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 3b2c58108667f..74985c6c625e9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -396,7 +396,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
bool hasAtomicFaddRtnForTy(SDValue &Op) const;
bool enableAggressiveFMAFusion(EVT VT) const override;
bool enableAggressiveFMAFusion(LLT Ty) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index bd0cbd15bc267..e0ec64a00ae1c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17638,15 +17638,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
}
-bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- if (!Subtarget.hasVSX())
- return false;
- if (Subtarget.hasP9Vector() && VT == MVT::f128)
- return true;
- return VT == MVT::f32 || VT == MVT::f64 ||
- VT == MVT::v4f32 || VT == MVT::v2f64;
-}
-
bool PPCTargetLowering::
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
const Value *Mask = AndI.getOperand(1);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index b80479427c2e4..2f71758b0d465 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1468,7 +1468,6 @@ namespace llvm {
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
/// getAddrModeForFlags - Based on the set of address flags, select the most
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5a04f11c36efc..95b60cedcd1c1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1525,12 +1525,6 @@ bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
return Index == 0 || Index == ResElts;
}
-bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- return (VT == MVT::f16 && Subtarget.hasStdExtZfhOrZfhmin()) ||
- (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
- (VT == MVT::f64 && Subtarget.hasStdExtD());
-}
-
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 4d73c3035cead..e625c97af90aa 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -409,7 +409,6 @@ class RISCVTargetLowering : public TargetLowering {
/// should be stack expanded.
bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
// If the pair to store is a mixture of float and int values, we will
// save two bitwise instructions and one float-to-int instruction and
@@ -425,6 +424,7 @@ class RISCVTargetLowering : public TargetLowering {
// out until we get testcase to prove it is a win.
return false;
}
+
bool
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 87d2eb1b5a54b..e9b47fc4160b3 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -1,3 +1,4 @@
+
//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -423,10 +424,6 @@ class SystemZTargetLowering : public TargetLowering {
}
bool isCheapToSpeculateCtlz(Type *) const override { return true; }
bool preferZeroCompareBranch() const override { return true; }
- bool hasBitPreservingFPLogic(EVT VT) const override {
- EVT ScVT = VT.getScalarType();
- return ScVT == MVT::f32 || ScVT == MVT::f64 || ScVT == MVT::f128;
- }
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
return Mask && Mask->getValue().isIntN(16);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 62265f3bd63ed..88665860a24f0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6001,10 +6001,6 @@ bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return Subtarget.hasLZCNT();
}
-bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
-}
-
bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
// Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
// expensive than a straight movsd. On the other hand, it's important to
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 191c8764e4743..c9fd51c5ff548 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1088,8 +1088,6 @@ namespace llvm {
bool isCtlzFast() const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
-
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
// If the pair to store is a mixture of float and int values, we will
// save two bitwise instructions and one float-to-int instruction and
diff --git a/llvm/test/CodeGen/AMDGPU/fneg.ll b/llvm/test/CodeGen/AMDGPU/fneg.ll
index fdee6a27c0343..7d7162165f9ed 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.ll
@@ -188,8 +188,8 @@ define i16 @v_fneg_i16(i16 %in) {
}
; FUNC-LABEL: {{^}}s_fneg_i16_fp_use:
-; SI: v_cvt_f32_f16_e64 [[CVT0:v[0-9]+]], -s{{[0-9]+}}
-; SI: v_add_f32_e32 [[ADD:v[0-9]+]], 2.0, [[CVT0]]
+; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], s{{[0-9]+}}
+; SI: v_sub_f32_e32 [[ADD:v[0-9]+]], 2.0, [[CVT0]]
; SI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[ADD]]
; VI: s_load_dword [[IN:s[0-9]+]]
@@ -204,8 +204,8 @@ define amdgpu_kernel void @s_fneg_i16_fp_use(ptr addrspace(1) %out, i16 %in) {
; FUNC-LABEL: {{^}}v_fneg_i16_fp_use:
; SI: s_waitcnt
-; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
-; SI-NEXT: v_add_f32_e32 v0, 2.0, v0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT: v_sub_f32_e32 v0, 2.0, v0
; SI-NEXT: s_setpc_b64
; VI: s_waitcnt
@@ -257,8 +257,10 @@ define <2 x i16> @v_fneg_v2i16(<2 x i16> %in) {
; FUNC-LABEL: {{^}}s_fneg_v2i16_fp_use:
; SI: s_lshr_b32 s3, s2, 16
-; SI: v_cvt_f32_f16_e64 v0, -s3
-; SI: v_cvt_f32_f16_e64 v1, -s2
+; SI: v_cvt_f32_f16_e32 v0, s3
+; SI: v_cvt_f32_f16_e32 v1, s2
+; SI: v_sub_f32_e32 v0, 2.0, v0
+; SI: v_sub_f32_e32 v1, 2.0, v1
; VI: s_lshr_b32 s5, s4, 16
; VI: s_xor_b32 s5, s5, 0x8000
@@ -278,10 +280,10 @@ define amdgpu_kernel void @s_fneg_v2i16_fp_use(ptr addrspace(1) %out, i32 %arg)
; FUNC-LABEL: {{^}}v_fneg_v2i16_fp_use:
; SI: v_lshrrev_b32_e32 v1, 16, v0
-; SI: v_cvt_f32_f16_e64 v0, -v0
-; SI: v_cvt_f32_f16_e64 v1, -v1
-; SI: v_add_f32_e32 v0, 2.0, v0
-; SI: v_add_f32_e32 v1, 2.0, v1
+; SI: v_cvt_f32_f16_e32 v0, v0
+; SI: v_cvt_f32_f16_e32 v1, v1
+; SI: v_sub_f32_e32 v0, 2.0, v0
+; SI: v_sub_f32_e32 v1, 2.0, v1
; VI: s_waitcnt
; VI: v_mov_b32_e32 v1, 0x4000
diff --git a/llvm/test/CodeGen/PowerPC/fabs.ll b/llvm/test/CodeGen/PowerPC/fabs.ll
index 65f3926ca0055..6821d827c6f2d 100644
--- a/llvm/test/CodeGen/PowerPC/fabs.ll
+++ b/llvm/test/CodeGen/PowerPC/fabs.ll
@@ -13,12 +13,7 @@ define double @fabs(double %f) {
define float @bitcast_fabs(float %x) {
; CHECK-LABEL: bitcast_fabs:
; CHECK: # %bb.0:
-; CHECK: stfs f1, 8(r1)
-; CHECK: lwz r3, 8(r1)
-; CHECK-NEXT: clrlwi r3, r3, 1
-; CHECK-NEXT: stw r3, 12(r1)
-; CHECK-NEXT: lfs f1, 12(r1)
-; CHECK-NEXT: addi r1, r1, 16
+; CHECK-NEXT: fabs f1, f1
; CHECK-NEXT: blr
;
%bc1 = bitcast float %x to i32
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll b/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll
index 3b8d6edf04d33..e2ea8974f6551 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll
@@ -211,8 +211,8 @@ define half @movmsk(half %x) {
define half @bitcast_fabs(half %x) {
; CHECK-LABEL: bitcast_fabs:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%bc1 = bitcast half %x to i16
%and = and i16 %bc1, 32767
@@ -223,8 +223,8 @@ define half @bitcast_fabs(half %x) {
define half @bitcast_fneg(half %x) {
; CHECK-LABEL: bitcast_fneg:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%bc1 = bitcast half %x to i16
%xor = xor i16 %bc1, 32768
@@ -285,8 +285,8 @@ define half @fsub_bitcast_fneg(half %x, half %y) {
define half @nabs(half %a) {
; CHECK-LABEL: nabs:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%conv = bitcast half %a to i16
%and = or i16 %conv, -32768
diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll
index 155ecf42c0918..b9e6803686621 100644
--- a/llvm/test/CodeGen/X86/fp128-i128.ll
+++ b/llvm/test/CodeGen/X86/fp128-i128.ll
@@ -130,14 +130,8 @@ entry:
define fp128 @TestI128_1(fp128 %x) #0 {
; SSE-LABEL: TestI128_1:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: subq $40, %rsp
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
-; SSE-NEXT: andq {{[0-9]+}}(%rsp), %rax
-; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movq %rcx, (%rsp)
-; SSE-NEXT: movaps (%rsp), %xmm0
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: callq __lttf2 at PLT
; SSE-NEXT: xorl %ecx, %ecx
@@ -145,19 +139,13 @@ define fp128 @TestI128_1(fp128 %x) #0 {
; SSE-NEXT: sets %cl
; SSE-NEXT: shlq $4, %rcx
; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rcx), %xmm0
-; SSE-NEXT: addq $40, %rsp
+; SSE-NEXT: popq %rax
; SSE-NEXT: retq
;
; AVX-LABEL: TestI128_1:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: subq $40, %rsp
-; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp)
-; AVX-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
-; AVX-NEXT: andq {{[0-9]+}}(%rsp), %rax
-; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; AVX-NEXT: movq %rcx, (%rsp)
-; AVX-NEXT: vmovaps (%rsp), %xmm0
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; AVX-NEXT: callq __lttf2 at PLT
; AVX-NEXT: xorl %ecx, %ecx
@@ -165,7 +153,7 @@ define fp128 @TestI128_1(fp128 %x) #0 {
; AVX-NEXT: sets %cl
; AVX-NEXT: shlq $4, %rcx
; AVX-NEXT: vmovaps {{\.?LCPI[0-9]+_[0-9]+}}(%rcx), %xmm0
-; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: popq %rax
; AVX-NEXT: retq
entry:
%0 = bitcast fp128 %x to i128
More information about the llvm-commits
mailing list