[llvm] 70520e2 - [AArch64] Add test showing reassociation potential.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 9 07:21:17 PST 2023
Author: Florian Hahn
Date: 2023-01-09T15:20:55Z
New Revision: 70520e2f1c5fc48afd578aeb320ec61e05d79813
URL: https://github.com/llvm/llvm-project/commit/70520e2f1c5fc48afd578aeb320ec61e05d79813
DIFF: https://github.com/llvm/llvm-project/commit/70520e2f1c5fc48afd578aeb320ec61e05d79813.diff
LOG: [AArch64] Add test showing reassociation potential.
Add a test case where some ops of a reassociate-able expression are in
an earlier block.
This can appear in practice, e.g. when computing the final reduction
value after vectorization.
Added:
llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-different-blocks.mir
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-
diff erent-blocks.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-
diff erent-blocks.mir
new file mode 100644
index 000000000000..c85578ba76f7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-
diff erent-blocks.mir
@@ -0,0 +1,159 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=arm64-appe-ios -o - -run-pass=machine-combiner %s | FileCheck %s
+
+--- |
+ define float @reassoicate_
diff erent_block(ptr %a, i1 %c) {
+ ret float undef
+ }
+
+ define float @no_reassociate_
diff erent_block(ptr %a, i1 %c) {
+ ret float undef
+ }
+
+ declare void @use()
+
+
+...
+# FIXME: Should reassociate the serialized reduction in bb.1 to improve parallelism.
+---
+name: reassoicate_
diff erent_block
+alignment: 4
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: reassoicate_
diff erent_block
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
+ ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
+ ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
+ ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
+ ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
+ ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
+ ; CHECK-NEXT: $q1 = COPY [[LDRQui2]]
+ ; CHECK-NEXT: $q2 = COPY [[LDRQui1]]
+ ; CHECK-NEXT: $q3 = COPY [[LDRQui3]]
+ ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $x0, $w1
+
+ %5:gpr32 = COPY $w1
+ %4:gpr64common = COPY $x0
+ %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
+ %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
+ %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
+ %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
+ TBZW %5, 0, %bb.2
+ B %bb.1
+
+ bb.1:
+ %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
+ %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
+ %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
+ %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
+ %10:gpr64all = COPY %9.dsub
+ %12:fpr64 = COPY %10
+ %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
+ $s0 = COPY %11
+ RET_ReallyLR implicit $s0
+
+ bb.2:
+ $q0 = COPY %0
+ $q1 = COPY %2
+ $q2 = COPY %1
+ $q3 = COPY %3
+ TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
+
+...
+# Reassociation of the reduction in bb.1 is not profitable, because LDRQui3 has a
+# much larger latency than the other loads.
+---
+name: no_reassociate_
diff erent_block
+alignment: 4
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: no_reassociate_
diff erent_block
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
+ ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
+ ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
+ ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY1]], 8 :: (load (s64))
+ ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64common = LDRXui killed [[LDRXui]], 0 :: (load (s64))
+ ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui killed [[LDRXui1]], 0 :: (load (s128), align 4)
+ ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
+ ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
+ ; CHECK-NEXT: $q1 = COPY [[LDRQui2]]
+ ; CHECK-NEXT: $q2 = COPY [[LDRQui1]]
+ ; CHECK-NEXT: $q3 = COPY [[LDRQui3]]
+ ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $x0, $w1
+
+ %5:gpr32 = COPY $w1
+ %4:gpr64common = COPY $x0
+ %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
+ %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
+ %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
+ %6:gpr64common = LDRXui %4, 8 :: (load (s64))
+ %7:gpr64common = LDRXui killed %6, 0 :: (load (s64))
+ %3:fpr128 = LDRQui killed %7, 0 :: (load (s128), align 4)
+ TBZW %5, 0, %bb.2
+ B %bb.1
+
+ bb.1:
+ %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
+ %9:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %8, implicit $fpcr
+ %10:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %9, implicit $fpcr
+ %11:fpr128 = nofpexcept FADDPv4f32 %10, %10, implicit $fpcr
+ %12:gpr64all = COPY %11.dsub
+ %14:fpr64 = COPY %12
+ %13:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %14, implicit $fpcr
+ $s0 = COPY %13
+ RET_ReallyLR implicit $s0
+
+ bb.2:
+ $q0 = COPY %0
+ $q1 = COPY %2
+ $q2 = COPY %1
+ $q3 = COPY %3
+ TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
+
+...
More information about the llvm-commits
mailing list