[llvm] [RISCV] Model integer min max instructions from Zbb execute in late-B ALU in SiFive7 (PR #85131)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 13 15:57:18 PDT 2024
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/85131
>From 04d9b98d0009b3c5ab241fa40615f65ca569c7ff Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 13 Mar 2024 12:48:33 -0700
Subject: [PATCH 1/2] [RISCV] Add sched classes for Zbb integer min max
instructions
---
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 8 ++++----
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 5 ++++-
llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 2 ++
llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td | 2 ++
llvm/lib/Target/RISCV/RISCVScheduleZb.td | 4 ++++
5 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index f0f8494dd9a313..a882b208a76889 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -424,13 +424,13 @@ def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh", Commutable=1>,
let Predicates = [HasStdExtZbb] in {
def MIN : ALU_rr<0b0000101, 0b100, "min", Commutable=1>,
- Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+ Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
def MINU : ALU_rr<0b0000101, 0b101, "minu", Commutable=1>,
- Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+ Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
def MAX : ALU_rr<0b0000101, 0b110, "max", Commutable=1>,
- Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+ Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
def MAXU : ALU_rr<0b0000101, 0b111, "maxu", Commutable=1>,
- Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+ Sched<[WriteIMinMax, ReadIMinMax, ReadIMinMax]>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbkb] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 240d170bfcf6f9..82e4508637c44e 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -186,7 +186,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
WriteBEXT, WriteBEXTI,
WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
WriteCPOP, WriteCPOP32,
- WriteREV8, WriteORCB, WriteSFB,
+ WriteREV8, WriteORCB, WriteIMinMax, WriteSFB,
WriteIMul, WriteIMul32,
WriteIDiv, WriteIDiv32,
WriteIRem, WriteIRem32,
@@ -305,6 +305,8 @@ def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
// orc.b is in the late-B ALU.
def : WriteRes<WriteORCB, [SiFive7PipeB]>;
+def : WriteRes<WriteIMinMax, [SiFive7PipeAB]>;
+
// rev8 is in the late-A and late-B ALUs.
def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
@@ -1041,6 +1043,7 @@ def : SiFive7AnyToGPRBypass<ReadCTZ32>;
def : ReadAdvance<ReadCPOP, 0>;
def : ReadAdvance<ReadCPOP32, 0>;
def : SiFive7AnyToGPRBypass<ReadORCB>;
+def : SiFive7AnyToGPRBypass<ReadIMinMax>;
def : SiFive7AnyToGPRBypass<ReadREV8>;
def : SiFive7AnyToGPRBypass<ReadSHXADD>;
def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index d02d34a0fb9c58..8ec2e4ff885ebb 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -109,6 +109,7 @@ def : WriteRes<WriteCTZ, [SiFiveP400IntArith]>;
def : WriteRes<WriteCTZ32, [SiFiveP400IntArith]>;
def : WriteRes<WriteORCB, [SiFiveP400IntArith]>;
+def : WriteRes<WriteIMinMax, [SiFiveP400IntArith]>;
def : WriteRes<WriteREV8, [SiFiveP400IntArith]>;
@@ -349,6 +350,7 @@ def : ReadAdvance<ReadCTZ32, 0>;
def : ReadAdvance<ReadCPOP, 0>;
def : ReadAdvance<ReadCPOP32, 0>;
def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
def : ReadAdvance<ReadREV8, 0>;
def : ReadAdvance<ReadSHXADD, 0>;
def : ReadAdvance<ReadSHXADD32, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
index ef491edf3671f8..4fc7b0335af538 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
@@ -85,6 +85,7 @@ def : WriteRes<WriteRotateImm32, [XS2ALU]>;
def : WriteRes<WriteRotateReg, [XS2ALU]>;
def : WriteRes<WriteRotateReg32, [XS2ALU]>;
def : WriteRes<WriteORCB, [XS2ALU]>;
+def : WriteRes<WriteIMinMax, [XS2ALU]>;
def : WriteRes<WriteREV8, [XS2ALU]>;
// Zbkb
@@ -288,6 +289,7 @@ def : ReadAdvance<ReadCTZ32, 0>;
def : ReadAdvance<ReadCPOP, 0>;
def : ReadAdvance<ReadCPOP32, 0>;
def : XS2LoadToALUBypass<ReadORCB>;
+def : XS2LoadToALUBypass<ReadIMinMax>;
def : XS2LoadToALUBypass<ReadREV8>;
// Zbkc
def : ReadAdvance<ReadCLMUL, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZb.td b/llvm/lib/Target/RISCV/RISCVScheduleZb.td
index 0a16390e505356..93381f439e0dfc 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleZb.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleZb.td
@@ -25,6 +25,7 @@ def WriteCPOP : SchedWrite;
def WriteCPOP32 : SchedWrite;
def WriteREV8 : SchedWrite;
def WriteORCB : SchedWrite;
+def WriteIMinMax : SchedWrite;
// Zbc extension
def WriteCLMUL : SchedWrite; // CLMUL/CLMULR/CLMULH
@@ -63,6 +64,7 @@ def ReadCPOP : SchedRead;
def ReadCPOP32 : SchedRead;
def ReadREV8 : SchedRead;
def ReadORCB : SchedRead;
+def ReadIMinMax : SchedRead;
// Zbc extension
def ReadCLMUL : SchedRead; // CLMUL/CLMULR/CLMULH
@@ -106,6 +108,7 @@ def : WriteRes<WriteCPOP, []>;
def : WriteRes<WriteCPOP32, []>;
def : WriteRes<WriteREV8, []>;
def : WriteRes<WriteORCB, []>;
+def : WriteRes<WriteIMinMax, []>;
def : ReadAdvance<ReadRotateImm, 0>;
def : ReadAdvance<ReadRotateImm32, 0>;
@@ -119,6 +122,7 @@ def : ReadAdvance<ReadCPOP, 0>;
def : ReadAdvance<ReadCPOP32, 0>;
def : ReadAdvance<ReadREV8, 0>;
def : ReadAdvance<ReadORCB, 0>;
+def : ReadAdvance<ReadIMinMax, 0>;
}
}
>From 4c7a681189d52bd4fc94e7cdf52199d48fe9accf Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 13 Mar 2024 12:49:48 -0700
Subject: [PATCH 2/2] [RISCV] Model integer min max instructions from Zbb
execute in late-B ALU
We don't model the early vs late ALU so we just need to remove usage of
SiFivePipeA for these instructions.
---
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 3 ++-
llvm/test/CodeGen/RISCV/machine-combiner.ll | 24 +++++++++----------
.../tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s | 14 +++++------
3 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 82e4508637c44e..3586d235bdbbb9 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -305,7 +305,8 @@ def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
// orc.b is in the late-B ALU.
def : WriteRes<WriteORCB, [SiFive7PipeB]>;
-def : WriteRes<WriteIMinMax, [SiFive7PipeAB]>;
+// min/max are in the late-B ALU
+def : WriteRes<WriteIMinMax, [SiFive7PipeB]>;
// rev8 is in the late-A and late-B ALUs.
def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index cfdefec04600c8..ebf232cc458ba0 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -740,9 +740,9 @@ define i8 @test_reassoc_minu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_minu_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a3, 255
-; CHECK-NEXT: andi a2, a2, 255
; CHECK-NEXT: andi a1, a1, 255
; CHECK-NEXT: andi a0, a0, 255
+; CHECK-NEXT: andi a2, a2, 255
; CHECK-NEXT: minu a0, a0, a1
; CHECK-NEXT: minu a1, a2, a3
; CHECK-NEXT: minu a0, a0, a1
@@ -757,9 +757,9 @@ define i16 @test_reassoc_minu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_minu_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: zext.h a3, a3
-; CHECK-NEXT: zext.h a2, a2
; CHECK-NEXT: zext.h a1, a1
; CHECK-NEXT: zext.h a0, a0
+; CHECK-NEXT: zext.h a2, a2
; CHECK-NEXT: minu a0, a0, a1
; CHECK-NEXT: minu a1, a2, a3
; CHECK-NEXT: minu a0, a0, a1
@@ -774,9 +774,9 @@ define i32 @test_reassoc_minu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_minu_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.w a3, a3
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: minu a0, a0, a1
; CHECK-NEXT: minu a1, a2, a3
; CHECK-NEXT: minu a0, a0, a1
@@ -804,9 +804,9 @@ define i8 @test_reassoc_min_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_min_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.b a3, a3
-; CHECK-NEXT: sext.b a2, a2
; CHECK-NEXT: sext.b a1, a1
; CHECK-NEXT: sext.b a0, a0
+; CHECK-NEXT: sext.b a2, a2
; CHECK-NEXT: min a0, a0, a1
; CHECK-NEXT: min a1, a2, a3
; CHECK-NEXT: min a0, a0, a1
@@ -821,9 +821,9 @@ define i16 @test_reassoc_min_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_min_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.h a3, a3
-; CHECK-NEXT: sext.h a2, a2
; CHECK-NEXT: sext.h a1, a1
; CHECK-NEXT: sext.h a0, a0
+; CHECK-NEXT: sext.h a2, a2
; CHECK-NEXT: min a0, a0, a1
; CHECK-NEXT: min a1, a2, a3
; CHECK-NEXT: min a0, a0, a1
@@ -838,9 +838,9 @@ define i32 @test_reassoc_min_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_min_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.w a3, a3
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: min a0, a0, a1
; CHECK-NEXT: min a1, a2, a3
; CHECK-NEXT: min a0, a0, a1
@@ -868,9 +868,9 @@ define i8 @test_reassoc_maxu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_maxu_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a3, 255
-; CHECK-NEXT: andi a2, a2, 255
; CHECK-NEXT: andi a1, a1, 255
; CHECK-NEXT: andi a0, a0, 255
+; CHECK-NEXT: andi a2, a2, 255
; CHECK-NEXT: maxu a0, a0, a1
; CHECK-NEXT: maxu a1, a2, a3
; CHECK-NEXT: maxu a0, a0, a1
@@ -885,9 +885,9 @@ define i16 @test_reassoc_maxu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_maxu_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: zext.h a3, a3
-; CHECK-NEXT: zext.h a2, a2
; CHECK-NEXT: zext.h a1, a1
; CHECK-NEXT: zext.h a0, a0
+; CHECK-NEXT: zext.h a2, a2
; CHECK-NEXT: maxu a0, a0, a1
; CHECK-NEXT: maxu a1, a2, a3
; CHECK-NEXT: maxu a0, a0, a1
@@ -902,9 +902,9 @@ define i32 @test_reassoc_maxu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_maxu_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.w a3, a3
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: maxu a0, a0, a1
; CHECK-NEXT: maxu a1, a2, a3
; CHECK-NEXT: maxu a0, a0, a1
@@ -932,9 +932,9 @@ define i8 @test_reassoc_max_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_max_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.b a3, a3
-; CHECK-NEXT: sext.b a2, a2
; CHECK-NEXT: sext.b a1, a1
; CHECK-NEXT: sext.b a0, a0
+; CHECK-NEXT: sext.b a2, a2
; CHECK-NEXT: max a0, a0, a1
; CHECK-NEXT: max a1, a2, a3
; CHECK-NEXT: max a0, a0, a1
@@ -949,9 +949,9 @@ define i16 @test_reassoc_max_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_max_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.h a3, a3
-; CHECK-NEXT: sext.h a2, a2
; CHECK-NEXT: sext.h a1, a1
; CHECK-NEXT: sext.h a0, a0
+; CHECK-NEXT: sext.h a2, a2
; CHECK-NEXT: max a0, a0, a1
; CHECK-NEXT: max a1, a2, a3
; CHECK-NEXT: max a0, a0, a1
@@ -966,9 +966,9 @@ define i32 @test_reassoc_max_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_max_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: sext.w a3, a3
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: max a0, a0, a1
; CHECK-NEXT: max a1, a2, a3
; CHECK-NEXT: max a0, a0, a1
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s
index 892a5d14e8f3a5..03f7de2fe9a4c2 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/gpr-bypass.s
@@ -180,10 +180,10 @@ jr a0
# CHECK-NEXT: 1 3 0.50 sext.b a0, a0
# CHECK-NEXT: 1 3 0.50 sext.h a0, a0
# CHECK-NEXT: 1 3 0.50 zext.h a0, a0
-# CHECK-NEXT: 1 3 0.50 min a0, a0, a0
-# CHECK-NEXT: 1 3 0.50 minu a0, a0, a0
-# CHECK-NEXT: 1 3 0.50 max a0, a0, a0
-# CHECK-NEXT: 1 3 0.50 maxu a0, a0, a0
+# CHECK-NEXT: 1 3 1.00 min a0, a0, a0
+# CHECK-NEXT: 1 3 1.00 minu a0, a0, a0
+# CHECK-NEXT: 1 3 1.00 max a0, a0, a0
+# CHECK-NEXT: 1 3 1.00 maxu a0, a0, a0
# CHECK-NEXT: 1 3 1.00 rol a0, a0, a0
# CHECK-NEXT: 1 3 1.00 ror a0, a0, a0
# CHECK-NEXT: 1 3 1.00 rori a0, a0, 1
@@ -225,7 +225,7 @@ jr a0
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - 39.00 52.00 - - - -
+# CHECK-NEXT: - - 37.00 54.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -289,9 +289,9 @@ jr a0
# CHECK-NEXT: - - - 1.00 - - - - sext.h a0, a0
# CHECK-NEXT: - - 1.00 - - - - - zext.h a0, a0
# CHECK-NEXT: - - - 1.00 - - - - min a0, a0, a0
-# CHECK-NEXT: - - 1.00 - - - - - minu a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - minu a0, a0, a0
# CHECK-NEXT: - - - 1.00 - - - - max a0, a0, a0
-# CHECK-NEXT: - - 1.00 - - - - - maxu a0, a0, a0
+# CHECK-NEXT: - - - 1.00 - - - - maxu a0, a0, a0
# CHECK-NEXT: - - - 1.00 - - - - rol a0, a0, a0
# CHECK-NEXT: - - - 1.00 - - - - ror a0, a0, a0
# CHECK-NEXT: - - - 1.00 - - - - rori a0, a0, 1
More information about the llvm-commits
mailing list