[llvm] c69d839 - [AArch64][MachineScheduler] Set no side effect for movprfx
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 27 09:20:48 PST 2022
Author: zhongyunde
Date: 2022-12-28T01:18:14+08:00
New Revision: c69d83908a35d48beadb3dd54f6018425e922ad7
URL: https://github.com/llvm/llvm-project/commit/c69d83908a35d48beadb3dd54f6018425e922ad7
DIFF: https://github.com/llvm/llvm-project/commit/c69d83908a35d48beadb3dd54f6018425e922ad7.diff
LOG: [AArch64][MachineScheduler] Set no side effect for movprfx
The movprfx is a vector copy, so it doesn't access memory. Set the
value of hasSideEffects 0 to avoid return true for the hasUnmodeledSideEffects(),
which will block the machine scheduler which load/store instructions.
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D140680
Added:
llvm/test/CodeGen/AArch64/sched-movprfx.ll
Modified:
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index e9d09aa664b2e..7fcc32cb899b8 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7916,6 +7916,8 @@ class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>
let Inst{10} = opc{0};
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
diff --git a/llvm/test/CodeGen/AArch64/sched-movprfx.ll b/llvm/test/CodeGen/AArch64/sched-movprfx.ll
new file mode 100644
index 0000000000000..f625aa6422107
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sched-movprfx.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64 -mcpu=tsv110 -mattr=+sve < %s | FileCheck %s
+
+; Check that the movprfx intrinsic does not prevent load instructions from
+; being scheduled together. As load instructions have long latency, expected
+; be preferentially issued.
+
+
+; NOTE: The unused paramter ensures z0/z1 is free, avoiding the antidependence for schedule.
+define <vscale x 2 x i64> @and_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64>* %base) {
+; CHECK-LABEL: and_i64_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.d, p1/m, z2.d
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %data0 = tail call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %c, i1 0)
+ %data1 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>* %base,
+ i32 1,
+ <vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> undef)
+ %out = add <vscale x 2 x i64> %data0, %data1
+ ret <vscale x 2 x i64> %out
+}
+
+declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)
+declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
index fb060f4dcae53..618c3f3ce8ce9 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
@@ -587,14 +587,14 @@ define void @srem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-NEXT: ptrue p0.s, vl4
; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32]
-; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
; VBITS_GE_128-NEXT: movprfx z16, z0
; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z4.s
; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s
-; VBITS_GE_128-NEXT: movprfx z4, z3
-; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s
; VBITS_GE_128-NEXT: movprfx z16, z1
; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z5.s
+; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
+; VBITS_GE_128-NEXT: movprfx z4, z3
+; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s
; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s
; VBITS_GE_128-NEXT: movprfx z5, z2
; VBITS_GE_128-NEXT: sdiv z5.s, p0/m, z5.s, z7.s
@@ -1407,14 +1407,14 @@ define void @urem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-NEXT: ptrue p0.s, vl4
; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32]
-; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
; VBITS_GE_128-NEXT: movprfx z16, z0
; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z4.s
; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s
-; VBITS_GE_128-NEXT: movprfx z4, z3
-; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s
; VBITS_GE_128-NEXT: movprfx z16, z1
; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z5.s
+; VBITS_GE_128-NEXT: ldp q7, q6, [x1]
+; VBITS_GE_128-NEXT: movprfx z4, z3
+; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s
; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s
; VBITS_GE_128-NEXT: movprfx z5, z2
; VBITS_GE_128-NEXT: udiv z5.s, p0/m, z5.s, z7.s
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 88b4038c33082..4477139436ece 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -578,8 +578,8 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) #0 {
; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
; CHECK-NEXT: mov z3.s, z2.s[1]
; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
; CHECK-NEXT: ldp q0, q1, [x0, #64]
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
; CHECK-NEXT: fmov w10, s5
; CHECK-NEXT: mov z5.s, z5.s[1]
@@ -590,18 +590,18 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) #0 {
; CHECK-NEXT: fmov w8, s4
; CHECK-NEXT: strh w9, [sp, #8]
; CHECK-NEXT: fmov w9, s6
-; CHECK-NEXT: strh w10, [sp, #4]
; CHECK-NEXT: mov z4.s, z4.s[1]
+; CHECK-NEXT: strh w10, [sp, #4]
; CHECK-NEXT: strh w8, [sp]
; CHECK-NEXT: fmov w8, s3
-; CHECK-NEXT: strh w9, [sp, #14]
; CHECK-NEXT: movprfx z3, z7
; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
; CHECK-NEXT: strh w8, [sp, #10]
; CHECK-NEXT: fmov w8, s3
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT: strh w9, [sp, #14]
; CHECK-NEXT: fmov w9, s5
; CHECK-NEXT: fmov w10, s4
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
@@ -1331,8 +1331,8 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) #0 {
; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
; CHECK-NEXT: mov z3.s, z2.s[1]
; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
; CHECK-NEXT: ldp q0, q1, [x0, #64]
+; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d
; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
; CHECK-NEXT: fmov w10, s5
; CHECK-NEXT: mov z5.s, z5.s[1]
@@ -1343,18 +1343,18 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) #0 {
; CHECK-NEXT: fmov w8, s4
; CHECK-NEXT: strh w9, [sp, #8]
; CHECK-NEXT: fmov w9, s6
-; CHECK-NEXT: strh w10, [sp, #4]
; CHECK-NEXT: mov z4.s, z4.s[1]
+; CHECK-NEXT: strh w10, [sp, #4]
; CHECK-NEXT: strh w8, [sp]
; CHECK-NEXT: fmov w8, s3
-; CHECK-NEXT: strh w9, [sp, #14]
; CHECK-NEXT: movprfx z3, z7
; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
+; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
; CHECK-NEXT: strh w8, [sp, #10]
; CHECK-NEXT: fmov w8, s3
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT: strh w9, [sp, #14]
; CHECK-NEXT: fmov w9, s5
; CHECK-NEXT: fmov w10, s4
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index 6b48ead00dcdb..aed6f5d3c110d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -319,14 +319,14 @@ define void @bswap_v8i32(ptr %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: lsr z3.s, p0/m, z3.s, #8
-; CHECK-NEXT: movprfx z4, z1
-; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24
; CHECK-NEXT: movprfx z5, z1
; CHECK-NEXT: lsr z5.s, p0/m, z5.s, #8
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24
+; CHECK-NEXT: movprfx z4, z1
+; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24
; CHECK-NEXT: and z3.s, z3.s, #0xff00
; CHECK-NEXT: and z5.s, z5.s, #0xff00
; CHECK-NEXT: orr z2.d, z3.d, z2.d
@@ -356,10 +356,10 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24
; CHECK-NEXT: movprfx z4, z0
@@ -396,10 +396,10 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24
; CHECK-NEXT: movprfx z4, z0
@@ -436,14 +436,14 @@ define void @bswap_v4i64(ptr %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #40
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #24
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #8
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56
; CHECK-NEXT: and z3.d, z3.d, #0xff00
; CHECK-NEXT: and z4.d, z4.d, #0xff0000
; CHECK-NEXT: and z5.d, z5.d, #0xff000000
@@ -451,8 +451,6 @@ define void @bswap_v4i64(ptr %a) #0 {
; CHECK-NEXT: orr z3.d, z5.d, z4.d
; CHECK-NEXT: mov z6.d, z0.d
; CHECK-NEXT: mov z7.d, z0.d
-; CHECK-NEXT: movprfx z16, z0
-; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56
; CHECK-NEXT: orr z2.d, z3.d, z2.d
; CHECK-NEXT: and z6.d, z6.d, #0xff000000
; CHECK-NEXT: and z7.d, z7.d, #0xff0000
@@ -463,6 +461,8 @@ define void @bswap_v4i64(ptr %a) #0 {
; CHECK-NEXT: orr z3.d, z4.d, z3.d
; CHECK-NEXT: movprfx z4, z1
; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #40
+; CHECK-NEXT: movprfx z16, z0
+; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56
; CHECK-NEXT: and z0.d, z0.d, #0xff00
; CHECK-NEXT: movprfx z5, z1
; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #56
diff --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
index abc1ad4cdafcb..36d477738ff86 100644
--- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s
@@ -3928,7 +3928,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 4 1.00 mov z21.s, p15/m, #-32768
# CHECK-NEXT: 1 4 0.50 mov z31.b, p15/m, z31.b
# CHECK-NEXT: 1 6 1.00 U mov z31.b, p7/m, b31
-# CHECK-NEXT: 1 1 0.17 U movprfx z31, z6
+# CHECK-NEXT: 1 1 0.17 movprfx z31, z6
# CHECK-NEXT: 1 8 1.00 mov z31.b, p7/m, wsp
# CHECK-NEXT: 1 6 1.00 mov z31.b, wsp
# CHECK-NEXT: 1 4 1.00 mov z31.b, z31.b[63]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
index f1d43e2a88ccf..5891350b78022 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
@@ -5030,7 +5030,7 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-32768
# CHECK-NEXT: 1 2 0.50 mov z31.b, p15/m, z31.b
# CHECK-NEXT: 1 2 0.50 U mov z31.b, p7/m, b31
-# CHECK-NEXT: 1 2 0.50 U movprfx z31, z6
+# CHECK-NEXT: 1 2 0.50 movprfx z31, z6
# CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, wsp
# CHECK-NEXT: 1 3 3.00 mov z31.b, wsp
# CHECK-NEXT: 1 2 0.50 mov z31.b, z31.b[63]
More information about the llvm-commits
mailing list