[llvm] 5871f18 - [AArch64] Lower extending uitofp using tbl.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 16 02:20:55 PDT 2022


Author: Florian Hahn
Date: 2022-09-16T10:20:25+01:00
New Revision: 5871f1882792aebcf5d374713bf435d92201ce09

URL: https://github.com/llvm/llvm-project/commit/5871f1882792aebcf5d374713bf435d92201ce09
DIFF: https://github.com/llvm/llvm-project/commit/5871f1882792aebcf5d374713bf435d92201ce09.diff

LOG: [AArch64] Lower extending uitofp using tbl.

On AArch64, doing the zero-extend separately first can be lowered more
efficiently using tbl, building on D120571.

https://alive2.llvm.org/ce/z/8Je595

Depends on D120571

Reviewed By: t.p.northover

Differential Revision: https://reviews.llvm.org/D133494

Added: 
    

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9e77f795ea8ce..45416edb3011c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -8047,6 +8047,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
     if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
       return true;
 
+    if (isa<UIToFPInst>(I) && TLI->optimizeExtendOrTruncateConversion(
+                                  I, LI->getLoopFor(I->getParent())))
+      return true;
+
     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
       /// Sink a zext or sext into its user blocks if the target type doesn't
       /// fit in one register

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0de2645aa23f2..26fbcc71a5556 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13235,6 +13235,21 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
     createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
     return true;
   }
+
+  auto *UIToFP = dyn_cast<UIToFPInst>(I);
+  if (UIToFP &&
+      (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
+      SrcTy->getElementType()->isIntegerTy(8) &&
+      DstTy->getElementType()->isFloatTy()) {
+    IRBuilder<> Builder(I);
+    auto *ZExt = cast<ZExtInst>(
+        Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
+    auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
+    I->replaceAllUsesWith(UI);
+    I->eraseFromParent();
+    createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
+    return true;
+  }
   return false;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index 8ee68a219d6c7..83f40b2faa1dc 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -386,28 +386,69 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: lCPI8_0:
+; CHECK-NEXT:   .byte   4                               ; 0x4
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   5                               ; 0x5
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   6                               ; 0x6
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   7                               ; 0x7
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT: lCPI8_1:
+; CHECK-NEXT:   .byte   0                               ; 0x0
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   1                               ; 0x1
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   2                               ; 0x2
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   3                               ; 0x3
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+; CHECK-NEXT:   .byte   255                             ; 0xff
+
 define void @uitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
 ; CHECK-LABEL: uitofp_v8i8_to_v8f32:
 ; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    adrp x9, lCPI8_0 at PAGE
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    adrp x10, lCPI8_1 at PAGE
 ; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    ldr q0, [x9, lCPI8_0 at PAGEOFF]
+; CHECK-NEXT:  Lloh5:
+; CHECK-NEXT:    ldr q1, [x10, lCPI8_1 at PAGEOFF]
 ; CHECK-NEXT:  LBB8_1: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr d0, [x0, x8, lsl #3]
+; CHECK-NEXT:    ldr d2, [x0, x8, lsl #3]
 ; CHECK-NEXT:    add x9, x1, x8, lsl #5
 ; CHECK-NEXT:    add x8, x8, #1
 ; CHECK-NEXT:    cmp x8, #1000
-; CHECK-NEXT:    zip1.8b v1, v0, v0
-; CHECK-NEXT:    zip2.8b v0, v0, v0
-; CHECK-NEXT:    bic.4h v1, #255, lsl #8
-; CHECK-NEXT:    bic.4h v0, #255, lsl #8
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v0, v0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    stp q1, q0, [x9]
+; CHECK-NEXT:    tbl.16b v3, { v2 }, v0
+; CHECK-NEXT:    tbl.16b v2, { v2 }, v1
+; CHECK-NEXT:    ucvtf.4s v3, v3
+; CHECK-NEXT:    ucvtf.4s v2, v2
+; CHECK-NEXT:    stp q2, q3, [x9]
 ; CHECK-NEXT:    b.eq LBB8_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:    .loh AdrpLdr Lloh3, Lloh5
+; CHECK-NEXT:    .loh AdrpLdr Lloh2, Lloh4
 entry:
   br label %loop
 
@@ -426,38 +467,118 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: lCPI9_0:
+; CHECK-NEXT:     .byte   12                              ; 0xc
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   13                              ; 0xd
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   14                              ; 0xe
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   15                              ; 0xf
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT: lCPI9_1:
+; CHECK-NEXT:     .byte   8                               ; 0x8
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   9                               ; 0x9
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   10                              ; 0xa
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   11                              ; 0xb
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT: lCPI9_2:
+; CHECK-NEXT:     .byte   4                               ; 0x4
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   5                               ; 0x5
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   6                               ; 0x6
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   7                               ; 0x7
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT: lCPI9_3:
+; CHECK-NEXT:     .byte   0                               ; 0x0
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   1                               ; 0x1
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   2                               ; 0x2
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   3                               ; 0x3
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+; CHECK-NEXT:     .byte   255                             ; 0xff
+
 define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
 ; CHECK-LABEL: uitofp_v16i8_to_v16f32:
 ; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:  Lloh6:
+; CHECK-NEXT:    adrp x9, lCPI9_0 at PAGE
+; CHECK-NEXT:  Lloh7:
+; CHECK-NEXT:    adrp x10, lCPI9_1 at PAGE
+; CHECK-NEXT:  Lloh8:
+; CHECK-NEXT:    adrp x11, lCPI9_2 at PAGE
+; CHECK-NEXT:  Lloh9:
+; CHECK-NEXT:    adrp x12, lCPI9_3 at PAGE
 ; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:  Lloh10:
+; CHECK-NEXT:    ldr q0, [x9, lCPI9_0 at PAGEOFF]
+; CHECK-NEXT:  Lloh11:
+; CHECK-NEXT:    ldr q1, [x10, lCPI9_1 at PAGEOFF]
+; CHECK-NEXT:  Lloh12:
+; CHECK-NEXT:    ldr q2, [x11, lCPI9_2 at PAGEOFF]
+; CHECK-NEXT:  Lloh13:
+; CHECK-NEXT:    ldr q3, [x12, lCPI9_3 at PAGEOFF]
 ; CHECK-NEXT:  LBB9_1: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr q0, [x0, x8, lsl #4]
+; CHECK-NEXT:    ldr q4, [x0, x8, lsl #4]
 ; CHECK-NEXT:    add x9, x1, x8, lsl #6
 ; CHECK-NEXT:    add x8, x8, #1
 ; CHECK-NEXT:    cmp x8, #1000
-; CHECK-NEXT:    ext.16b v1, v0, v0, #8
-; CHECK-NEXT:    zip1.8b v2, v0, v0
-; CHECK-NEXT:    zip2.8b v0, v0, v0
-; CHECK-NEXT:    bic.4h v2, #255, lsl #8
-; CHECK-NEXT:    zip1.8b v3, v1, v0
-; CHECK-NEXT:    zip2.8b v1, v1, v0
-; CHECK-NEXT:    bic.4h v0, #255, lsl #8
-; CHECK-NEXT:    ushll.4s v2, v2, #0
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    bic.4h v3, #255, lsl #8
-; CHECK-NEXT:    bic.4h v1, #255, lsl #8
-; CHECK-NEXT:    ucvtf.4s v2, v2
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v0, v0
-; CHECK-NEXT:    ushll.4s v3, v3, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    ucvtf.4s v3, v3
-; CHECK-NEXT:    stp q2, q0, [x9]
-; CHECK-NEXT:    stp q3, q1, [x9, #32]
+; CHECK-NEXT:    tbl.16b v5, { v4 }, v0
+; CHECK-NEXT:    tbl.16b v6, { v4 }, v1
+; CHECK-NEXT:    tbl.16b v7, { v4 }, v2
+; CHECK-NEXT:    tbl.16b v4, { v4 }, v3
+; CHECK-NEXT:    ucvtf.4s v5, v5
+; CHECK-NEXT:    ucvtf.4s v6, v6
+; CHECK-NEXT:    ucvtf.4s v7, v7
+; CHECK-NEXT:    ucvtf.4s v4, v4
+; CHECK-NEXT:    stp q6, q5, [x9, #32]
+; CHECK-NEXT:    stp q4, q7, [x9]
 ; CHECK-NEXT:    b.eq LBB9_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:    .loh AdrpLdr Lloh9, Lloh13
+; CHECK-NEXT:    .loh AdrpLdr Lloh8, Lloh12
+; CHECK-NEXT:    .loh AdrpLdr Lloh7, Lloh11
+; CHECK-NEXT:    .loh AdrpLdr Lloh6, Lloh10
 entry:
   br label %loop
 


        


More information about the llvm-commits mailing list