[llvm] [RISCV][GISEL] Legalize G_VSCALE (PR #85967)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 21 06:07:24 PDT 2024
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/85967
>From 48c7ae53773666c3ce982850666686e5b8bb35d9 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 7 Mar 2024 13:35:22 -0800
Subject: [PATCH 1/2] [RISCV][GISEL] Legalize G_VSCALE
G_VSCALE should be lowered using VLENB.
---
.../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 44 +++++++
.../Target/RISCV/GISel/RISCVLegalizerInfo.h | 1 +
llvm/lib/Target/RISCV/RISCVInstrGISel.td | 8 ++
.../legalizer/rvv/legalize-vscale-rv32.mir | 120 ++++++++++++++++++
.../legalizer/rvv/legalize-vscale-rv64.mir | 120 ++++++++++++++++++
5 files changed, 293 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 64ae4e94a8c929..a7829d4819ebd0 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -374,6 +374,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, s32, sXLen)
.lowerForCartesianProduct({s32, sXLen, p0}, {p0});
+ getActionDefinitionsBuilder(G_VSCALE).customFor({sXLen});
+
getLegacyLegalizerInfo().computeTables();
}
@@ -495,6 +497,46 @@ bool RISCVLegalizerInfo::shouldBeInConstantPool(APInt APImm,
return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
}
+bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ const LLT XLenTy(STI.getXLenVT());
+ Register Dst = MI.getOperand(0).getReg();
+
+ // We define our scalable vector types for lmul=1 to use a 64 bit known
+ // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
+ // vscale as VLENB / 8.
+ static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
+ if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
+ report_fatal_error("Support for VLEN==32 is incomplete.");
+ // We assume VLENB is a multiple of 8. We manually choose the best shift
+ // here because SimplifyDemandedBits isn't always able to simplify it.
+ uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
+ if (isPowerOf2_64(Val)) {
+ uint64_t Log2 = Log2_64(Val);
+ if (Log2 < 3) {
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
+ } else if (Log2 > 3) {
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
+ } else {
+ MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
+ }
+ } else if ((Val % 8) == 0) {
+ // If the multiplier is a multiple of 8, scale it down to avoid needing
+ // to shift the VLENB value.
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
+ } else {
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
+ MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool RISCVLegalizerInfo::legalizeCustom(
LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const {
@@ -552,6 +594,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
}
case TargetOpcode::G_VASTART:
return legalizeVAStart(MI, MIRBuilder);
+ case TargetOpcode::G_VSCALE:
+ return legalizeVScale(MI, MIRBuilder);
}
llvm_unreachable("expected switch to return");
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 323426034827e4..e2a98c8d2c736c 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -42,6 +42,7 @@ class RISCVLegalizerInfo : public LegalizerInfo {
GISelChangeObserver &Observer) const;
bool legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
+ bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
};
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index ede8c9809833cc..54e22d6257814a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -24,3 +24,11 @@ def G_FCLASS : RISCVGenericInstruction {
let hasSideEffects = false;
}
def : GINodeEquiv<G_FCLASS, riscv_fclass>;
+
+// Pseudo equivalent to a RISCVISD::READ_VLENB.
+def G_READ_VLENB : RISCVGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins);
+ let hasSideEffects = false;
+}
+def : GINodeEquiv<G_READ_VLENB, riscv_read_vlenb>;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
new file mode 100644
index 00000000000000..60fc3c66adec4a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: test_1
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_1
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 1
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_2
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_2
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 2
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_3
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_3
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 3
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_4
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_4
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 4
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_8
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_8
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 8
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_16
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_16
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[SHL]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 16
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_40
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_40
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 40
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+
+
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir
new file mode 100644
index 00000000000000..3e140a5ef72a84
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: test_1
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_1
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 1
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_2
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_2
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 2
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_3
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_3
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s64)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 3
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_4
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_4
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 4
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_8
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_8
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 8
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_16
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_16
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[SHL]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 16
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_40
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_40
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s64)
+ ; CHECK-NEXT: $x11 = COPY [[C]](s64)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 40
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+
+
>From 46864fc41f2aa5dba16dae7a97c731f439cce9a2 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 21 Mar 2024 06:07:08 -0700
Subject: [PATCH 2/2] !fixup support clampScalar for G_VSCALE; respond to
comments
---
.../CodeGen/GlobalISel/MachineIRBuilder.h | 11 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 51 ++++-
.../CodeGen/GlobalISel/MachineIRBuilder.cpp | 7 +
.../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 8 +-
.../legalizer/rvv/legalize-vscale-rv32.mir | 193 ++++++++++++++++--
5 files changed, 252 insertions(+), 18 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index aaa81342845bff..616db4fc6fb6cb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1165,6 +1165,17 @@ class MachineIRBuilder {
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildVScale(const DstOp &Res, const ConstantInt &MinElts);
+ /// Build and insert \p Res = G_VSCALE \p MinElts
+ ///
+ /// G_VSCALE puts the value of the runtime vscale multiplied by \p MinElts
+ /// into \p Res.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res must be a generic virtual register with scalar type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildVScale(const DstOp &Res, const APInt &MinElts);
+
/// Build and insert a G_INTRINSIC instruction.
///
/// There are four different opcodes based on combinations of whether the
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index abe23af00a7890..5fa10b26befe59 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1699,6 +1699,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_STRICT_FLDEXP:
return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_VSCALE:
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const APInt &Val = MI.getOperand(1).getCImm()->getValue();
+ unsigned TotalSize = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts = TotalSize / NarrowSize;
+
+ SmallVector<Register, 4> PartRegs;
+ for (int I = 0; I != NumParts; ++I) {
+ unsigned Offset = I * NarrowSize;
+ auto K =
+ MIRBuilder.buildVScale(NarrowTy, Val.lshr(Offset).trunc(NarrowSize));
+ PartRegs.push_back(K.getReg(0));
+ }
+ LLT LeftoverTy;
+ unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
+ SmallVector<Register, 1> LeftoverRegs;
+ if (LeftoverBits != 0) {
+ LeftoverTy = LLT::scalar(LeftoverBits);
+ auto K = MIRBuilder.buildVScale(
+ LeftoverTy,
+ Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
+ LeftoverRegs.push_back(K.getReg(0));
+ }
+
+ insertParts(MI.getOperand(0).getReg(),
+ Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
}
}
@@ -2966,7 +2996,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMINIMUM:
- case TargetOpcode::G_VECREDUCE_FMAXIMUM:
+ case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -2980,6 +3010,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_VSCALE: {
+ MachineOperand &SrcMO = MI.getOperand(1);
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
+ MRI.getType(MI.getOperand(0).getReg()));
+ assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
+ ExtOpc == TargetOpcode::G_ANYEXT) &&
+ "Illegal Extend");
+ const APInt &SrcVal = SrcMO.getCImm()->getValue();
+ const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
+ ? SrcVal.sext(WideTy.getSizeInBits())
+ : SrcVal.zext(WideTy.getSizeInBits());
+ Observer.changingInstr(MI);
+ SrcMO.setCImm(ConstantInt::get(Ctx, Val));
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ }
}
static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index f7aaa0f02efcb3..f2665e25d195e9 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -811,6 +811,13 @@ MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res,
return VScale;
}
+MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res,
+ const APInt &MinElts) {
+ ConstantInt *CI =
+ ConstantInt::get(getMF().getFunction().getContext(), MinElts);
+ return buildVScale(Res, *CI);
+}
+
static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
if (HasSideEffects && IsConvergent)
return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index a7829d4819ebd0..75ce617e68acf9 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -374,7 +374,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, s32, sXLen)
.lowerForCartesianProduct({s32, sXLen, p0}, {p0});
- getActionDefinitionsBuilder(G_VSCALE).customFor({sXLen});
+ getActionDefinitionsBuilder(G_VSCALE)
+ .clampScalar(0, sXLen, sXLen)
+ .customFor({sXLen});
getLegacyLegalizerInfo().computeTables();
}
@@ -507,7 +509,9 @@ bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
// vscale as VLENB / 8.
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
- report_fatal_error("Support for VLEN==32 is incomplete.");
+ // Support for VLEN==32 is incomplete.
+ return false;
+
// We assume VLENB is a multiple of 8. We manually choose the best shift
// here because SimplifyDemandedBits isn't always able to simplify it.
uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
index 60fc3c66adec4a..39634954223622 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
@@ -2,11 +2,11 @@
# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
---
-name: test_1
+name: test_1_s32
body: |
bb.0.entry:
- ; CHECK-LABEL: name: test_1
+ ; CHECK-LABEL: name: test_1_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
@@ -17,11 +17,11 @@ body: |
PseudoRET implicit $x10
...
---
-name: test_2
+name: test_2_s32
body: |
bb.0.entry:
- ; CHECK-LABEL: name: test_2
+ ; CHECK-LABEL: name: test_2_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
@@ -32,11 +32,11 @@ body: |
PseudoRET implicit $x10
...
---
-name: test_3
+name: test_3_s32
body: |
bb.0.entry:
- ; CHECK-LABEL: name: test_3
+ ; CHECK-LABEL: name: test_3_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
@@ -54,11 +54,11 @@ body: |
PseudoRET implicit $x10
...
---
-name: test_4
+name: test_4_s32
body: |
bb.0.entry:
- ; CHECK-LABEL: name: test_4
+ ; CHECK-LABEL: name: test_4_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
@@ -69,11 +69,11 @@ body: |
PseudoRET implicit $x10
...
---
-name: test_8
+name: test_8_s32
body: |
bb.0.entry:
- ; CHECK-LABEL: name: test_8
+ ; CHECK-LABEL: name: test_8_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
@@ -82,11 +82,11 @@ body: |
PseudoRET implicit $x10
...
---
-name: test_16
+name: test_16_s32
body: |
bb.0.entry:
- ; CHECK-LABEL: name: test_16
+ ; CHECK-LABEL: name: test_16_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[READ_VLENB]], [[C]](s32)
@@ -97,11 +97,11 @@ body: |
PseudoRET implicit $x10
...
---
-name: test_40
+name: test_40_s32
body: |
bb.0.entry:
- ; CHECK-LABEL: name: test_40
+ ; CHECK-LABEL: name: test_40_s32
; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
@@ -117,4 +117,167 @@ body: |
PseudoRET implicit $x10
...
-
+---
+name: test_1_s64
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_1_s64
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB1]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 1
+ %1:_(s32) = G_TRUNC %0
+ $x10 = COPY %1
+ PseudoRET implicit $x10
+...
+---
+name: test_2_s64
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_2_s64
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB1]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 2
+ %1:_(s32) = G_TRUNC %0
+ $x10 = COPY %1
+ PseudoRET implicit $x10
+...
+---
+name: test_3_s64
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_3_s64
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB1]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C2]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 3
+ %1:_(s32) = G_TRUNC %0
+ $x10 = COPY %1
+ PseudoRET implicit $x10
+...
+---
+name: test_4_s64
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_4_s64
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB1]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 4
+ %1:_(s32) = G_TRUNC %0
+ $x10 = COPY %1
+ PseudoRET implicit $x10
+...
+---
+name: test_8_s64
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_8_s64
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB1]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 8
+ %1:_(s32) = G_TRUNC %0
+ $x10 = COPY %1
+ PseudoRET implicit $x10
+...
+---
+name: test_16_s64
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_16_s64
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB1]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[SHL]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 16
+ %1:_(s32) = G_TRUNC %0
+ $x10 = COPY %1
+ PseudoRET implicit $x10
+...
+---
+name: test_40_s64
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: test_40_s64
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB1]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 40
+ %1:_(s32) = G_TRUNC %0
+ $x10 = COPY %1
+ PseudoRET implicit $x10
+...
More information about the llvm-commits
mailing list