[llvm] 5c7957d - [AArch64] Allow i16->f64 uitofp tbl shuffles
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 14:21:57 PDT 2024
Author: David Green
Date: 2024-09-11T22:21:52+01:00
New Revision: 5c7957dd4f12e7c9128068c5ed92464cdc59947e
URL: https://github.com/llvm/llvm-project/commit/5c7957dd4f12e7c9128068c5ed92464cdc59947e
DIFF: https://github.com/llvm/llvm-project/commit/5c7957dd4f12e7c9128068c5ed92464cdc59947e.diff
LOG: [AArch64] Allow i16->f64 uitofp tbl shuffles
Just as we convert i8->f32 uitofp to tbl to perform the zext, we can do the
same for i16->f64.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 516d0cf33aaeb0..47da9d577cd827 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16615,7 +16615,7 @@ bool AArch64TargetLowering::shouldSinkOperands(
static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth,
unsigned NumElts, bool IsLittleEndian,
SmallVectorImpl<int> &Mask) {
- if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
+ if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
return false;
assert(DstWidth % SrcWidth == 0 &&
@@ -16649,7 +16649,7 @@ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op,
return nullptr;
auto *FirstEltZero = Builder.CreateInsertElement(
- PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
+ PoisonValue::get(SrcTy), Builder.getIntN(SrcWidth, 0), uint64_t(0));
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
Result = Builder.CreateBitCast(Result, DstTy);
if (DstTy != ZExtTy)
@@ -16670,7 +16670,7 @@ static Value *createTblShuffleForSExt(IRBuilderBase &Builder, Value *Op,
return nullptr;
auto *FirstEltZero = Builder.CreateInsertElement(
- PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
+ PoisonValue::get(SrcTy), Builder.getIntN(SrcWidth, 0), uint64_t(0));
return Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
}
@@ -16847,6 +16847,9 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
return false;
}
+ if (DstTy->getScalarSizeInBits() >= 64)
+ return false;
+
IRBuilder<> Builder(ZExt);
Value *Result = createTblShuffleForZExt(
Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
@@ -16859,8 +16862,10 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
}
auto *UIToFP = dyn_cast<UIToFPInst>(I);
- if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
- DstTy->getElementType()->isFloatTy()) {
+ if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
+ DstTy->getElementType()->isFloatTy()) ||
+ (SrcTy->getElementType()->isIntegerTy(16) &&
+ DstTy->getElementType()->isDoubleTy()))) {
IRBuilder<> Builder(I);
Value *ZExt = createTblShuffleForZExt(
Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index 0a3b9a070c2b32..d9d80f1cb50ee1 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -648,3 +648,178 @@ loop:
exit:
ret void
}
+
+define void @uitofp_v8i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapture noundef writeonly %y, i32 noundef %n) {
+; CHECK-LABEL: uitofp_v8i16_to_v8f64:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: Lloh22:
+; CHECK-NEXT: adrp x8, lCPI10_0 at PAGE
+; CHECK-NEXT: Lloh23:
+; CHECK-NEXT: adrp x9, lCPI10_1 at PAGE
+; CHECK-NEXT: Lloh24:
+; CHECK-NEXT: adrp x10, lCPI10_2 at PAGE
+; CHECK-NEXT: Lloh25:
+; CHECK-NEXT: ldr q0, [x8, lCPI10_0 at PAGEOFF]
+; CHECK-NEXT: Lloh26:
+; CHECK-NEXT: adrp x8, lCPI10_3 at PAGE
+; CHECK-NEXT: Lloh27:
+; CHECK-NEXT: ldr q1, [x9, lCPI10_1 at PAGEOFF]
+; CHECK-NEXT: Lloh28:
+; CHECK-NEXT: ldr q2, [x10, lCPI10_2 at PAGEOFF]
+; CHECK-NEXT: Lloh29:
+; CHECK-NEXT: ldr q3, [x8, lCPI10_3 at PAGEOFF]
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: LBB10_1: ; %vector.body
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr q4, [x0, x8]
+; CHECK-NEXT: add x9, x1, x8
+; CHECK-NEXT: add x8, x8, #64
+; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
+; CHECK-NEXT: tbl.16b v5, { v4 }, v0
+; CHECK-NEXT: tbl.16b v6, { v4 }, v1
+; CHECK-NEXT: tbl.16b v7, { v4 }, v2
+; CHECK-NEXT: tbl.16b v4, { v4 }, v3
+; CHECK-NEXT: ucvtf.2d v5, v5
+; CHECK-NEXT: ucvtf.2d v6, v6
+; CHECK-NEXT: ucvtf.2d v7, v7
+; CHECK-NEXT: ucvtf.2d v4, v4
+; CHECK-NEXT: stp q6, q5, [x9, #32]
+; CHECK-NEXT: stp q4, q7, [x9]
+; CHECK-NEXT: b.ne LBB10_1
+; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh29
+; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh28
+; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27
+; CHECK-NEXT: .loh AdrpAdrp Lloh22, Lloh26
+; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25
+entry:
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %.idx = shl nsw i64 %index, 3
+ %g = getelementptr inbounds i8, ptr %x, i64 %.idx
+ %wide.vec = load <8 x i16>, ptr %g, align 2
+ %u = uitofp <8 x i16> %wide.vec to <8 x double>
+ %h = getelementptr inbounds double, ptr %y, i64 %index
+ store <8 x double> %u, ptr %h, align 8
+ %index.next = add nuw i64 %index, 8
+ %c = icmp eq i64 %index.next, 1024
+ br i1 %c, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @uitofp_ld4_v32i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapture noundef writeonly %y, i32 noundef %n) {
+; CHECK-LABEL: uitofp_ld4_v32i16_to_v8f64:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: Lloh30:
+; CHECK-NEXT: adrp x8, lCPI11_0 at PAGE
+; CHECK-NEXT: Lloh31:
+; CHECK-NEXT: adrp x9, lCPI11_1 at PAGE
+; CHECK-NEXT: Lloh32:
+; CHECK-NEXT: adrp x10, lCPI11_2 at PAGE
+; CHECK-NEXT: Lloh33:
+; CHECK-NEXT: ldr q0, [x8, lCPI11_0 at PAGEOFF]
+; CHECK-NEXT: Lloh34:
+; CHECK-NEXT: adrp x8, lCPI11_3 at PAGE
+; CHECK-NEXT: Lloh35:
+; CHECK-NEXT: ldr q1, [x9, lCPI11_1 at PAGEOFF]
+; CHECK-NEXT: Lloh36:
+; CHECK-NEXT: ldr q2, [x10, lCPI11_2 at PAGEOFF]
+; CHECK-NEXT: Lloh37:
+; CHECK-NEXT: ldr q3, [x8, lCPI11_3 at PAGEOFF]
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: LBB11_1: ; %vector.body
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: add x9, x0, x8
+; CHECK-NEXT: ldp q5, q4, [x9, #32]
+; CHECK-NEXT: ldp q7, q6, [x9]
+; CHECK-NEXT: add x9, x1, x8
+; CHECK-NEXT: add x8, x8, #64
+; CHECK-NEXT: tbl.16b v16, { v4 }, v0
+; CHECK-NEXT: tbl.16b v17, { v5 }, v0
+; CHECK-NEXT: tbl.16b v21, { v4 }, v1
+; CHECK-NEXT: tbl.16b v18, { v6 }, v0
+; CHECK-NEXT: tbl.16b v19, { v7 }, v0
+; CHECK-NEXT: tbl.16b v20, { v7 }, v1
+; CHECK-NEXT: tbl.16b v22, { v5 }, v1
+; CHECK-NEXT: tbl.16b v23, { v5 }, v2
+; CHECK-NEXT: tbl.16b v24, { v4 }, v2
+; CHECK-NEXT: tbl.16b v25, { v7 }, v2
+; CHECK-NEXT: tbl.16b v5, { v5 }, v3
+; CHECK-NEXT: tbl.16b v4, { v4 }, v3
+; CHECK-NEXT: tbl.16b v7, { v7 }, v3
+; CHECK-NEXT: tbl.16b v26, { v6 }, v1
+; CHECK-NEXT: tbl.16b v27, { v6 }, v2
+; CHECK-NEXT: tbl.16b v6, { v6 }, v3
+; CHECK-NEXT: ucvtf.2d v17, v17
+; CHECK-NEXT: ucvtf.2d v16, v16
+; CHECK-NEXT: ucvtf.2d v19, v19
+; CHECK-NEXT: ucvtf.2d v18, v18
+; CHECK-NEXT: ucvtf.2d v22, v22
+; CHECK-NEXT: ucvtf.2d v23, v23
+; CHECK-NEXT: ucvtf.2d v5, v5
+; CHECK-NEXT: ucvtf.2d v21, v21
+; CHECK-NEXT: ucvtf.2d v24, v24
+; CHECK-NEXT: ucvtf.2d v4, v4
+; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
+; CHECK-NEXT: ucvtf.2d v20, v20
+; CHECK-NEXT: ucvtf.2d v25, v25
+; CHECK-NEXT: ucvtf.2d v7, v7
+; CHECK-NEXT: ucvtf.2d v26, v26
+; CHECK-NEXT: ucvtf.2d v27, v27
+; CHECK-NEXT: ucvtf.2d v6, v6
+; CHECK-NEXT: fadd.2d v17, v22, v17
+; CHECK-NEXT: fadd.2d v5, v23, v5
+; CHECK-NEXT: fadd.2d v16, v21, v16
+; CHECK-NEXT: fadd.2d v4, v24, v4
+; CHECK-NEXT: fadd.2d v19, v20, v19
+; CHECK-NEXT: fadd.2d v7, v25, v7
+; CHECK-NEXT: fadd.2d v18, v26, v18
+; CHECK-NEXT: fadd.2d v6, v27, v6
+; CHECK-NEXT: fadd.2d v5, v17, v5
+; CHECK-NEXT: fadd.2d v4, v16, v4
+; CHECK-NEXT: fadd.2d v7, v19, v7
+; CHECK-NEXT: fadd.2d v6, v18, v6
+; CHECK-NEXT: stp q5, q4, [x9, #32]
+; CHECK-NEXT: stp q7, q6, [x9]
+; CHECK-NEXT: b.ne LBB11_1
+; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh37
+; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh36
+; CHECK-NEXT: .loh AdrpLdr Lloh31, Lloh35
+; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh34
+; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh33
+entry:
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %.idx = shl nsw i64 %index, 3
+ %0 = getelementptr inbounds i8, ptr %x, i64 %.idx
+ %wide.vec = load <32 x i16>, ptr %0, align 2
+ %strided.vec = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+ %strided.vec36 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+ %strided.vec37 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+ %strided.vec38 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+ %1 = uitofp <8 x i16> %strided.vec to <8 x double>
+ %2 = uitofp <8 x i16> %strided.vec36 to <8 x double>
+ %3 = fadd fast <8 x double> %2, %1
+ %4 = uitofp <8 x i16> %strided.vec37 to <8 x double>
+ %5 = fadd fast <8 x double> %3, %4
+ %6 = uitofp <8 x i16> %strided.vec38 to <8 x double>
+ %7 = fadd fast <8 x double> %5, %6
+ %8 = getelementptr inbounds double, ptr %y, i64 %index
+ store <8 x double> %7, ptr %8, align 8
+ %index.next = add nuw i64 %index, 8
+ %9 = icmp eq i64 %index.next, 1024
+ br i1 %9, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:
+ ret void
+}
+
More information about the llvm-commits
mailing list