[llvm] 99b0078 - [AArch64] Tests for showing MachineCombiner COPY patterns. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon May 30 02:47:51 PDT 2022
Author: David Green
Date: 2022-05-30T10:47:44+01:00
New Revision: 99b007806420f8fc5cc9db7f4b71ba511fedb686
URL: https://github.com/llvm/llvm-project/commit/99b007806420f8fc5cc9db7f4b71ba511fedb686
DIFF: https://github.com/llvm/llvm-project/commit/99b007806420f8fc5cc9db7f4b71ba511fedb686.diff
LOG: [AArch64] Tests for showing MachineCombiner COPY patterns. NFC
Added:
llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
Modified:
llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
new file mode 100644
index 000000000000..27767435eec2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -mattr=+fullfp16 -O3 | FileCheck %s
+
+define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef %B, ptr noalias nocapture noundef %C, i32 noundef %n) {
+; CHECK-LABEL: fma_dup_f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-NEXT: cbz w2, .LBB0_8
+; CHECK-NEXT: // %bb.1: // %for.body.preheader
+; CHECK-NEXT: mov w8, w2
+; CHECK-NEXT: cmp w2, #15
+; CHECK-NEXT: b.hi .LBB0_3
+; CHECK-NEXT: // %bb.2:
+; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: .LBB0_3: // %vector.ph
+; CHECK-NEXT: and x9, x8, #0xfffffff0
+; CHECK-NEXT: add x10, x1, #16
+; CHECK-NEXT: add x11, x0, #16
+; CHECK-NEXT: mov x12, x9
+; CHECK-NEXT: dup v1.8h, v0.h[0]
+; CHECK-NEXT: .LBB0_4: // %vector.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldp q2, q3, [x11, #-16]
+; CHECK-NEXT: subs x12, x12, #16
+; CHECK-NEXT: add x11, x11, #32
+; CHECK-NEXT: ldp q4, q5, [x10, #-16]
+; CHECK-NEXT: fmla v4.8h, v2.8h, v1.8h
+; CHECK-NEXT: fmla v5.8h, v3.8h, v0.h[0]
+; CHECK-NEXT: stp q4, q5, [x10, #-16]
+; CHECK-NEXT: add x10, x10, #32
+; CHECK-NEXT: b.ne .LBB0_4
+; CHECK-NEXT: // %bb.5: // %middle.block
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: b.eq .LBB0_8
+; CHECK-NEXT: .LBB0_6: // %for.body.preheader1
+; CHECK-NEXT: lsl x10, x9, #1
+; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: add x9, x1, x10
+; CHECK-NEXT: add x10, x0, x10
+; CHECK-NEXT: .LBB0_7: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr h1, [x10], #2
+; CHECK-NEXT: ldr h2, [x9]
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: fmadd h1, h1, h0, h2
+; CHECK-NEXT: str h1, [x9], #2
+; CHECK-NEXT: b.ne .LBB0_7
+; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %cmp6.not = icmp eq i32 %n, 0
+ br i1 %cmp6.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %n to i64
+ %min.iters.check = icmp ult i32 %n, 16
+ br i1 %min.iters.check, label %for.body.preheader14, label %vector.ph
+
+vector.ph: ; preds = %for.body.preheader
+ %n.vec = and i64 %wide.trip.count, 4294967280
+ %broadcast.splatinsert = insertelement <8 x half> poison, half %B, i64 0
+ %broadcast.splat = shufflevector <8 x half> %broadcast.splatinsert, <8 x half> poison, <8 x i32> zeroinitializer
+ %broadcast.splatinsert10 = insertelement <8 x half> poison, half %B, i64 0
+ %broadcast.splat11 = shufflevector <8 x half> %broadcast.splatinsert10, <8 x half> poison, <8 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.ph
+ %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+ %0 = getelementptr inbounds half, ptr %A, i64 %index
+ %wide.load = load <8 x half>, ptr %0, align 2
+ %1 = getelementptr inbounds half, ptr %0, i64 8
+ %wide.load9 = load <8 x half>, ptr %1, align 2
+ %2 = fmul fast <8 x half> %wide.load, %broadcast.splat
+ %3 = fmul fast <8 x half> %wide.load9, %broadcast.splat11
+ %4 = getelementptr inbounds half, ptr %C, i64 %index
+ %wide.load12 = load <8 x half>, ptr %4, align 2
+ %5 = getelementptr inbounds half, ptr %4, i64 8
+ %wide.load13 = load <8 x half>, ptr %5, align 2
+ %6 = fadd fast <8 x half> %wide.load12, %2
+ %7 = fadd fast <8 x half> %wide.load13, %3
+ store <8 x half> %6, ptr %4, align 2
+ store <8 x half> %7, ptr %5, align 2
+ %index.next = add nuw i64 %index, 16
+ %8 = icmp eq i64 %index.next, %n.vec
+ br i1 %8, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body
+ %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
+ br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader14
+
+for.body.preheader14: ; preds = %for.body.preheader, %middle.block
+ %indvars.iv.ph = phi i64 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader14, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader14 ]
+ %arrayidx = getelementptr inbounds half, ptr %A, i64 %indvars.iv
+ %9 = load half, ptr %arrayidx, align 2
+ %mul = fmul fast half %9, %B
+ %arrayidx2 = getelementptr inbounds half, ptr %C, i64 %indvars.iv
+ %10 = load half, ptr %arrayidx2, align 2
+ %add = fadd fast half %10, %mul
+ store half %add, ptr %arrayidx2, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir
index 4de93fca36f1..a2a28419a872 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir
@@ -95,6 +95,10 @@
br label %for.cond
}
+ define void @extracopy(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 {
+ unreachable
+ }
+
attributes #0 = { "target-cpu"="cortex-a57" }
...
@@ -545,3 +549,71 @@ body: |
B %bb.1
...
+---
+name: extracopy
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr64 }
+ - { id: 1, class: fpr64 }
+ - { id: 2, class: fpr64 }
+ - { id: 3, class: fpr64 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: fpr64 }
+ - { id: 6, class: fpr64 }
+ - { id: 7, class: fpr128 }
+ - { id: 8, class: fpr128 }
+ - { id: 9, class: fpr64 }
+ - { id: 10, class: fpr64 }
+ - { id: 11, class: fpr64 }
+liveins:
+ - { reg: '$d0', virtual-reg: '%1' }
+ - { reg: '$d1', virtual-reg: '%2' }
+ - { reg: '$d2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: extracopy
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: liveins: $d0, $d1, $d2, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane killed [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[DUPv2i32lane]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[FMULv2f32_:%[0-9]+]]:fpr64 = FMULv2f32 [[COPY5]], [[COPY6]]
+ ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2f32_]], [[COPY4]]
+ ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64), align 16)
+ ; CHECK-NEXT: B %bb.1
+ bb.0:
+ liveins: $d0, $d1, $d2, $x0
+
+ %4:gpr64common = COPY $x0
+ %3:fpr64 = COPY $d2
+ %2:fpr64 = COPY $d1
+ %1:fpr64 = COPY $d0
+ %8:fpr128 = IMPLICIT_DEF
+ %7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub
+ %6:fpr64 = COPY %3
+ %5:fpr64 = COPY %2
+ %11:fpr64 = DUPv2i32lane killed %7, 0
+ %0:fpr64 = COPY %11
+
+ bb.1:
+ %9:fpr64 = FMULv2f32 %5, %0
+ %10:fpr64 = FADDv2f32 killed %9, %6
+ STRDui killed %10, %4, 0 :: (store 8, align 16)
+ B %bb.1
+
+...
More information about the llvm-commits
mailing list