[llvm] 5871f18 - [AArch64] Lower extending uitofp using tbl.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 16 02:20:55 PDT 2022
Author: Florian Hahn
Date: 2022-09-16T10:20:25+01:00
New Revision: 5871f1882792aebcf5d374713bf435d92201ce09
URL: https://github.com/llvm/llvm-project/commit/5871f1882792aebcf5d374713bf435d92201ce09
DIFF: https://github.com/llvm/llvm-project/commit/5871f1882792aebcf5d374713bf435d92201ce09.diff
LOG: [AArch64] Lower extending uitofp using tbl.
On AArch64, doing the zero-extend separately first can be lowered more
efficiently using tbl, building on D120571.
https://alive2.llvm.org/ce/z/8Je595
Depends on D120571
Reviewed By: t.p.northover
Differential Revision: https://reviews.llvm.org/D133494
Added:
Modified:
llvm/lib/CodeGen/CodeGenPrepare.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9e77f795ea8ce..45416edb3011c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -8047,6 +8047,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
+ if (isa<UIToFPInst>(I) && TLI->optimizeExtendOrTruncateConversion(
+ I, LI->getLoopFor(I->getParent())))
+ return true;
+
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
/// Sink a zext or sext into its user blocks if the target type doesn't
/// fit in one register
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0de2645aa23f2..26fbcc71a5556 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13235,6 +13235,21 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
return true;
}
+
+ auto *UIToFP = dyn_cast<UIToFPInst>(I);
+ if (UIToFP &&
+ (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
+ SrcTy->getElementType()->isIntegerTy(8) &&
+ DstTy->getElementType()->isFloatTy()) {
+ IRBuilder<> Builder(I);
+ auto *ZExt = cast<ZExtInst>(
+ Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
+ auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
+ I->replaceAllUsesWith(UI);
+ I->eraseFromParent();
+ createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
+ return true;
+ }
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index 8ee68a219d6c7..83f40b2faa1dc 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -386,28 +386,69 @@ exit:
ret void
}
+; CHECK-LABEL: lCPI8_0:
+; CHECK-NEXT: .byte 4 ; 0x4
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 5 ; 0x5
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 6 ; 0x6
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 7 ; 0x7
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: lCPI8_1:
+; CHECK-NEXT: .byte 0 ; 0x0
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 1 ; 0x1
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 2 ; 0x2
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 3 ; 0x3
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+
define void @uitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
; CHECK-LABEL: uitofp_v8i8_to_v8f32:
; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: adrp x9, lCPI8_0 at PAGE
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: adrp x10, lCPI8_1 at PAGE
; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: ldr q0, [x9, lCPI8_0 at PAGEOFF]
+; CHECK-NEXT: Lloh5:
+; CHECK-NEXT: ldr q1, [x10, lCPI8_1 at PAGEOFF]
; CHECK-NEXT: LBB8_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d0, [x0, x8, lsl #3]
+; CHECK-NEXT: ldr d2, [x0, x8, lsl #3]
; CHECK-NEXT: add x9, x1, x8, lsl #5
; CHECK-NEXT: add x8, x8, #1
; CHECK-NEXT: cmp x8, #1000
-; CHECK-NEXT: zip1.8b v1, v0, v0
-; CHECK-NEXT: zip2.8b v0, v0, v0
-; CHECK-NEXT: bic.4h v1, #255, lsl #8
-; CHECK-NEXT: bic.4h v0, #255, lsl #8
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ushll.4s v1, v1, #0
-; CHECK-NEXT: ucvtf.4s v0, v0
-; CHECK-NEXT: ucvtf.4s v1, v1
-; CHECK-NEXT: stp q1, q0, [x9]
+; CHECK-NEXT: tbl.16b v3, { v2 }, v0
+; CHECK-NEXT: tbl.16b v2, { v2 }, v1
+; CHECK-NEXT: ucvtf.4s v3, v3
+; CHECK-NEXT: ucvtf.4s v2, v2
+; CHECK-NEXT: stp q2, q3, [x9]
; CHECK-NEXT: b.eq LBB8_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh5
+; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh4
entry:
br label %loop
@@ -426,38 +467,118 @@ exit:
ret void
}
+; CHECK-LABEL: lCPI9_0:
+; CHECK-NEXT: .byte 12 ; 0xc
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 13 ; 0xd
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 14 ; 0xe
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 15 ; 0xf
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: lCPI9_1:
+; CHECK-NEXT: .byte 8 ; 0x8
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 9 ; 0x9
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 10 ; 0xa
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 11 ; 0xb
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: lCPI9_2:
+; CHECK-NEXT: .byte 4 ; 0x4
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 5 ; 0x5
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 6 ; 0x6
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 7 ; 0x7
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: lCPI9_3:
+; CHECK-NEXT: .byte 0 ; 0x0
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 1 ; 0x1
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 2 ; 0x2
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 3 ; 0x3
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+; CHECK-NEXT: .byte 255 ; 0xff
+
define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
; CHECK-LABEL: uitofp_v16i8_to_v16f32:
; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: Lloh6:
+; CHECK-NEXT: adrp x9, lCPI9_0 at PAGE
+; CHECK-NEXT: Lloh7:
+; CHECK-NEXT: adrp x10, lCPI9_1 at PAGE
+; CHECK-NEXT: Lloh8:
+; CHECK-NEXT: adrp x11, lCPI9_2 at PAGE
+; CHECK-NEXT: Lloh9:
+; CHECK-NEXT: adrp x12, lCPI9_3 at PAGE
; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: Lloh10:
+; CHECK-NEXT: ldr q0, [x9, lCPI9_0 at PAGEOFF]
+; CHECK-NEXT: Lloh11:
+; CHECK-NEXT: ldr q1, [x10, lCPI9_1 at PAGEOFF]
+; CHECK-NEXT: Lloh12:
+; CHECK-NEXT: ldr q2, [x11, lCPI9_2 at PAGEOFF]
+; CHECK-NEXT: Lloh13:
+; CHECK-NEXT: ldr q3, [x12, lCPI9_3 at PAGEOFF]
; CHECK-NEXT: LBB9_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8, lsl #4]
+; CHECK-NEXT: ldr q4, [x0, x8, lsl #4]
; CHECK-NEXT: add x9, x1, x8, lsl #6
; CHECK-NEXT: add x8, x8, #1
; CHECK-NEXT: cmp x8, #1000
-; CHECK-NEXT: ext.16b v1, v0, v0, #8
-; CHECK-NEXT: zip1.8b v2, v0, v0
-; CHECK-NEXT: zip2.8b v0, v0, v0
-; CHECK-NEXT: bic.4h v2, #255, lsl #8
-; CHECK-NEXT: zip1.8b v3, v1, v0
-; CHECK-NEXT: zip2.8b v1, v1, v0
-; CHECK-NEXT: bic.4h v0, #255, lsl #8
-; CHECK-NEXT: ushll.4s v2, v2, #0
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: bic.4h v3, #255, lsl #8
-; CHECK-NEXT: bic.4h v1, #255, lsl #8
-; CHECK-NEXT: ucvtf.4s v2, v2
-; CHECK-NEXT: ushll.4s v1, v1, #0
-; CHECK-NEXT: ucvtf.4s v0, v0
-; CHECK-NEXT: ushll.4s v3, v3, #0
-; CHECK-NEXT: ucvtf.4s v1, v1
-; CHECK-NEXT: ucvtf.4s v3, v3
-; CHECK-NEXT: stp q2, q0, [x9]
-; CHECK-NEXT: stp q3, q1, [x9, #32]
+; CHECK-NEXT: tbl.16b v5, { v4 }, v0
+; CHECK-NEXT: tbl.16b v6, { v4 }, v1
+; CHECK-NEXT: tbl.16b v7, { v4 }, v2
+; CHECK-NEXT: tbl.16b v4, { v4 }, v3
+; CHECK-NEXT: ucvtf.4s v5, v5
+; CHECK-NEXT: ucvtf.4s v6, v6
+; CHECK-NEXT: ucvtf.4s v7, v7
+; CHECK-NEXT: ucvtf.4s v4, v4
+; CHECK-NEXT: stp q6, q5, [x9, #32]
+; CHECK-NEXT: stp q4, q7, [x9]
; CHECK-NEXT: b.eq LBB9_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh9, Lloh13
+; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh12
+; CHECK-NEXT: .loh AdrpLdr Lloh7, Lloh11
+; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh10
entry:
br label %loop
More information about the llvm-commits
mailing list