[llvm] 13e66c0 - Revert "[ARM] Teach getIntImmCostInst about the cost of saturating fp converts"
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 1 07:25:24 PST 2021
Author: David Green
Date: 2021-12-01T15:25:19Z
New Revision: 13e66c070bdb4edb660b8372791ce52a661ba138
URL: https://github.com/llvm/llvm-project/commit/13e66c070bdb4edb660b8372791ce52a661ba138
DIFF: https://github.com/llvm/llvm-project/commit/13e66c070bdb4edb660b8372791ce52a661ba138.diff
LOG: Revert "[ARM] Teach getIntImmCostInst about the cost of saturating fp converts"
This reverts commit 6d41de380f223c8da02fd4d6a7f7dd1e7a404a24 as the
windows bots are not happy, in a way I do not understand. Revert whilst
we figure out what is wrong.
Added:
Modified:
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/test/CodeGen/ARM/fpclamptosat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 25dd74b11387f..88de84a4fd787 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -334,9 +334,8 @@ InstructionCost ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
}
// Checks whether Inst is part of a min(max()) or max(min()) pattern
-// that will match to an SSAT instruction. Returns the instruction being
-// saturated, or null if no saturation pattern was found.
-static Value *isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
+// that will match to an SSAT instruction
+static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
Value *LHS, *RHS;
ConstantInt *C;
SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
@@ -359,26 +358,12 @@ static Value *isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
return false;
};
- if (isSSatMin(Inst->getOperand(1)))
- return cast<Instruction>(Inst->getOperand(1))->getOperand(1);
- if (Inst->hasNUses(2) &&
- (isSSatMin(*Inst->user_begin()) || isSSatMin(*(++Inst->user_begin()))))
- return Inst->getOperand(1);
+ if (isSSatMin(Inst->getOperand(1)) ||
+ (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) ||
+ isSSatMin(*(++Inst->user_begin())))))
+ return true;
}
- return nullptr;
-}
-
-// Look for a FP Saturation pattern, where the instruction can be simplified to
-// a fptosi.sat. max(min(fptosi)). The constant in this case is always free.
-static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) {
- if (Imm != -2147483648)
- return false;
- Value *FP = isSSATMinMaxPattern(Inst, Imm);
- if (!FP && isa<ICmpInst>(Inst) && Inst->hasOneUse())
- FP = isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm);
- if (!FP)
- return false;
- return isa<FPToSIInst>(FP);
+ return false;
}
InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
@@ -438,9 +423,6 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return 0;
}
- if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm))
- return 0;
-
// We can convert <= -1 to < 0, which is generally quite cheap.
if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 45b4e72cdc206..d624aa9194957 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -5229,25 +5229,63 @@ define void @unroll_maxmin(i32* nocapture %0, float* nocapture readonly %1, i32
;
; VFP2-LABEL: unroll_maxmin:
; VFP2: @ %bb.0:
-; VFP2-NEXT: subs r1, #8
-; VFP2-NEXT: subs r0, #8
-; VFP2-NEXT: vldr s0, .LCPI54_0
-; VFP2-NEXT: mov.w r2, #1024
+; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; VFP2-NEXT: .pad #4
+; VFP2-NEXT: sub sp, #4
+; VFP2-NEXT: .vsave {d8}
+; VFP2-NEXT: vpush {d8}
+; VFP2-NEXT: sub.w r4, r1, #8
+; VFP2-NEXT: sub.w r5, r0, #8
+; VFP2-NEXT: vldr s16, .LCPI54_0
+; VFP2-NEXT: mov.w r8, #-1
+; VFP2-NEXT: mov.w r9, #-2147483648
+; VFP2-NEXT: mov.w r6, #1024
+; VFP2-NEXT: mvn r7, #-2147483648
; VFP2-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
-; VFP2-NEXT: vldr s2, [r1, #8]
-; VFP2-NEXT: subs r2, #2
-; VFP2-NEXT: vmul.f32 s2, s2, s0
-; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vmov r3, s2
-; VFP2-NEXT: str r3, [r0, #8]!
-; VFP2-NEXT: vldr s2, [r1, #12]
-; VFP2-NEXT: add.w r1, r1, #8
-; VFP2-NEXT: vmul.f32 s2, s2, s0
-; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vstr s2, [r0, #4]
+; VFP2-NEXT: vldr s0, [r4, #8]
+; VFP2-NEXT: vmul.f32 s0, s0, s16
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: subs r2, r0, r7
+; VFP2-NEXT: sbcs r2, r1, #0
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: ite ne
+; VFP2-NEXT: movne r2, r1
+; VFP2-NEXT: moveq r0, r7
+; VFP2-NEXT: subs.w r1, r9, r0
+; VFP2-NEXT: sbcs.w r1, r8, r2
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r0, r9
+; VFP2-NEXT: str r0, [r5, #8]!
+; VFP2-NEXT: vldr s0, [r4, #12]
+; VFP2-NEXT: vmul.f32 s0, s0, s16
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: subs r2, r0, r7
+; VFP2-NEXT: add.w r4, r4, #8
+; VFP2-NEXT: sbcs r2, r1, #0
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: ite ne
+; VFP2-NEXT: movne r2, r1
+; VFP2-NEXT: moveq r0, r7
+; VFP2-NEXT: subs.w r1, r9, r0
+; VFP2-NEXT: sbcs.w r1, r8, r2
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r0, r9
+; VFP2-NEXT: subs r6, #2
+; VFP2-NEXT: str r0, [r5, #4]
; VFP2-NEXT: bne .LBB54_1
; VFP2-NEXT: @ %bb.2:
-; VFP2-NEXT: bx lr
+; VFP2-NEXT: vpop {d8}
+; VFP2-NEXT: add sp, #4
+; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.3:
; VFP2-NEXT: .LCPI54_0:
@@ -5255,26 +5293,57 @@ define void @unroll_maxmin(i32* nocapture %0, float* nocapture readonly %1, i32
;
; FULL-LABEL: unroll_maxmin:
; FULL: @ %bb.0:
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: mov.w lr, #512
-; FULL-NEXT: subs r1, #8
-; FULL-NEXT: subs r0, #8
-; FULL-NEXT: vldr s0, .LCPI54_0
+; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: .pad #4
+; FULL-NEXT: sub sp, #4
+; FULL-NEXT: .vsave {d8}
+; FULL-NEXT: vpush {d8}
+; FULL-NEXT: mov.w r2, #512
+; FULL-NEXT: sub.w r5, r1, #8
+; FULL-NEXT: sub.w r6, r0, #8
+; FULL-NEXT: vldr s16, .LCPI54_0
+; FULL-NEXT: mov r4, r2
+; FULL-NEXT: mov.w r8, #-1
+; FULL-NEXT: mov.w r9, #-2147483648
+; FULL-NEXT: mvn r7, #-2147483648
; FULL-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
-; FULL-NEXT: vldr s2, [r1, #8]
-; FULL-NEXT: vmul.f32 s2, s2, s0
-; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vmov r2, s2
-; FULL-NEXT: str r2, [r0, #8]!
-; FULL-NEXT: vldr s2, [r1, #12]
-; FULL-NEXT: adds r1, #8
-; FULL-NEXT: vmul.f32 s2, s2, s0
-; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vstr s2, [r0, #4]
-; FULL-NEXT: le lr, .LBB54_1
-; FULL-NEXT: @ %bb.2:
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vldr s0, [r5, #8]
+; FULL-NEXT: vmul.f32 s0, s0, s16
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bl __aeabi_f2lz
+; FULL-NEXT: subs r2, r0, r7
+; FULL-NEXT: sbcs r2, r1, #0
+; FULL-NEXT: cset r2, lt
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r0, r0, r7, ne
+; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: subs.w r2, r9, r0
+; FULL-NEXT: sbcs.w r1, r8, r1
+; FULL-NEXT: csel r0, r0, r9, lt
+; FULL-NEXT: str r0, [r6, #8]!
+; FULL-NEXT: vldr s0, [r5, #12]
+; FULL-NEXT: vmul.f32 s0, s0, s16
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bl __aeabi_f2lz
+; FULL-NEXT: subs r2, r0, r7
+; FULL-NEXT: add.w r5, r5, #8
+; FULL-NEXT: sbcs r2, r1, #0
+; FULL-NEXT: sub.w r4, r4, #1
+; FULL-NEXT: cset r2, lt
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r0, r0, r7, ne
+; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: subs.w r2, r9, r0
+; FULL-NEXT: sbcs.w r1, r8, r1
+; FULL-NEXT: csel r0, r0, r9, lt
+; FULL-NEXT: str r0, [r6, #4]
+; FULL-NEXT: cbz r4, .LBB54_2
+; FULL-NEXT: le .LBB54_1
+; FULL-NEXT: .LBB54_2:
+; FULL-NEXT: vpop {d8}
+; FULL-NEXT: add sp, #4
+; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; FULL-NEXT: .p2align 2
; FULL-NEXT: @ %bb.3:
; FULL-NEXT: .LCPI54_0:
@@ -5425,25 +5494,63 @@ define void @unroll_minmax(i32* nocapture %0, float* nocapture readonly %1, i32
;
; VFP2-LABEL: unroll_minmax:
; VFP2: @ %bb.0:
-; VFP2-NEXT: subs r1, #8
-; VFP2-NEXT: subs r0, #8
-; VFP2-NEXT: vldr s0, .LCPI55_0
-; VFP2-NEXT: mov.w r2, #1024
+; VFP2-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; VFP2-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; VFP2-NEXT: .pad #4
+; VFP2-NEXT: sub sp, #4
+; VFP2-NEXT: .vsave {d8}
+; VFP2-NEXT: vpush {d8}
+; VFP2-NEXT: sub.w r4, r1, #8
+; VFP2-NEXT: sub.w r5, r0, #8
+; VFP2-NEXT: vldr s16, .LCPI55_0
+; VFP2-NEXT: mov.w r8, #-1
+; VFP2-NEXT: mov.w r9, #-2147483648
+; VFP2-NEXT: mov.w r6, #1024
+; VFP2-NEXT: mvn r7, #-2147483648
; VFP2-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
-; VFP2-NEXT: vldr s2, [r1, #8]
-; VFP2-NEXT: subs r2, #2
-; VFP2-NEXT: vmul.f32 s2, s2, s0
-; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vmov r3, s2
-; VFP2-NEXT: str r3, [r0, #8]!
-; VFP2-NEXT: vldr s2, [r1, #12]
-; VFP2-NEXT: add.w r1, r1, #8
-; VFP2-NEXT: vmul.f32 s2, s2, s0
-; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vstr s2, [r0, #4]
+; VFP2-NEXT: vldr s0, [r4, #8]
+; VFP2-NEXT: vmul.f32 s0, s0, s16
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: subs.w r2, r9, r0
+; VFP2-NEXT: sbcs.w r2, r8, r1
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r1, r8
+; VFP2-NEXT: moveq r0, r9
+; VFP2-NEXT: subs r2, r0, r7
+; VFP2-NEXT: sbcs r1, r1, #0
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r0, r7
+; VFP2-NEXT: str r0, [r5, #8]!
+; VFP2-NEXT: vldr s0, [r4, #12]
+; VFP2-NEXT: vmul.f32 s0, s0, s16
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: subs.w r2, r9, r0
+; VFP2-NEXT: add.w r4, r4, #8
+; VFP2-NEXT: sbcs.w r2, r8, r1
+; VFP2-NEXT: mov.w r2, #0
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r2, #1
+; VFP2-NEXT: cmp r2, #0
+; VFP2-NEXT: itt eq
+; VFP2-NEXT: moveq r1, r8
+; VFP2-NEXT: moveq r0, r9
+; VFP2-NEXT: subs r2, r0, r7
+; VFP2-NEXT: sbcs r1, r1, #0
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r0, r7
+; VFP2-NEXT: subs r6, #2
+; VFP2-NEXT: str r0, [r5, #4]
; VFP2-NEXT: bne .LBB55_1
; VFP2-NEXT: @ %bb.2:
-; VFP2-NEXT: bx lr
+; VFP2-NEXT: vpop {d8}
+; VFP2-NEXT: add sp, #4
+; VFP2-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.3:
; VFP2-NEXT: .LCPI55_0:
@@ -5451,26 +5558,57 @@ define void @unroll_minmax(i32* nocapture %0, float* nocapture readonly %1, i32
;
; FULL-LABEL: unroll_minmax:
; FULL: @ %bb.0:
-; FULL-NEXT: .save {r7, lr}
-; FULL-NEXT: push {r7, lr}
-; FULL-NEXT: mov.w lr, #512
-; FULL-NEXT: subs r1, #8
-; FULL-NEXT: subs r0, #8
-; FULL-NEXT: vldr s0, .LCPI55_0
+; FULL-NEXT: .save {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; FULL-NEXT: .pad #4
+; FULL-NEXT: sub sp, #4
+; FULL-NEXT: .vsave {d8}
+; FULL-NEXT: vpush {d8}
+; FULL-NEXT: mov.w r2, #512
+; FULL-NEXT: sub.w r5, r1, #8
+; FULL-NEXT: sub.w r6, r0, #8
+; FULL-NEXT: vldr s16, .LCPI55_0
+; FULL-NEXT: mov r4, r2
+; FULL-NEXT: mov.w r8, #-1
+; FULL-NEXT: mov.w r9, #-2147483648
+; FULL-NEXT: mvn r7, #-2147483648
; FULL-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
-; FULL-NEXT: vldr s2, [r1, #8]
-; FULL-NEXT: vmul.f32 s2, s2, s0
-; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vmov r2, s2
-; FULL-NEXT: str r2, [r0, #8]!
-; FULL-NEXT: vldr s2, [r1, #12]
-; FULL-NEXT: adds r1, #8
-; FULL-NEXT: vmul.f32 s2, s2, s0
-; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vstr s2, [r0, #4]
-; FULL-NEXT: le lr, .LBB55_1
-; FULL-NEXT: @ %bb.2:
-; FULL-NEXT: pop {r7, pc}
+; FULL-NEXT: vldr s0, [r5, #8]
+; FULL-NEXT: vmul.f32 s0, s0, s16
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bl __aeabi_f2lz
+; FULL-NEXT: subs.w r2, r9, r0
+; FULL-NEXT: sbcs.w r2, r8, r1
+; FULL-NEXT: cset r2, lt
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r0, r0, r9, ne
+; FULL-NEXT: csel r1, r1, r8, ne
+; FULL-NEXT: subs r2, r0, r7
+; FULL-NEXT: sbcs r1, r1, #0
+; FULL-NEXT: csel r0, r0, r7, lt
+; FULL-NEXT: str r0, [r6, #8]!
+; FULL-NEXT: vldr s0, [r5, #12]
+; FULL-NEXT: vmul.f32 s0, s0, s16
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: bl __aeabi_f2lz
+; FULL-NEXT: subs.w r2, r9, r0
+; FULL-NEXT: add.w r5, r5, #8
+; FULL-NEXT: sbcs.w r2, r8, r1
+; FULL-NEXT: sub.w r4, r4, #1
+; FULL-NEXT: cset r2, lt
+; FULL-NEXT: cmp r2, #0
+; FULL-NEXT: csel r0, r0, r9, ne
+; FULL-NEXT: csel r1, r1, r8, ne
+; FULL-NEXT: subs r2, r0, r7
+; FULL-NEXT: sbcs r1, r1, #0
+; FULL-NEXT: csel r0, r0, r7, lt
+; FULL-NEXT: str r0, [r6, #4]
+; FULL-NEXT: cbz r4, .LBB55_2
+; FULL-NEXT: le .LBB55_1
+; FULL-NEXT: .LBB55_2:
+; FULL-NEXT: vpop {d8}
+; FULL-NEXT: add sp, #4
+; FULL-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; FULL-NEXT: .p2align 2
; FULL-NEXT: @ %bb.3:
; FULL-NEXT: .LCPI55_0:
More information about the llvm-commits
mailing list