[llvm] [RISC-V] Support fixed vector type for inline asm (#117098) (PR #117100)

Wed Nov 20 18:22:11 PST 2024

https://github.com/zengdage created https://github.com/llvm/llvm-project/pull/117100

1. risc-v target don't support fixed vector types like v32i8 by isTypeLegalForClass, because those types don't be added into RCVTLists by RISCVRegisterInfo.td, so we need to use it's container value type to get it's RegisterClass.

2. We also need to fix the error between scalable vector type and fixed vector type. Take v32i8 to nxv8i8 as example,if don't apply this patch, the type become as follow in turn:

'v32i8 -> i256 -> i128 -> i64 -> nxv8i8'

SelectionDAG don't support translate i64 into nxv8i8, and it will get a fatal error. In order to fix this error, I check their vector type and RegisterClass, and if eligible, use insert_subvector to translate v32i8 into nxv8i8.

>From 8279b1d7445f5ba443d89191c4135b3a64f212f8 Mon Sep 17 00:00:00 2001
From: Zhijin Zeng <zhijin.zeng at spacemit.com>
Date: Wed, 20 Nov 2024 20:06:20 +0800
Subject: [PATCH] [RISC-V] Support fixed vector type for inline asm (#117098)

1. risc-v target don't support fixed vector types like v32i8 by
isTypeLegalForClass, because those types don't be added into
RCVTLists by RISCVRegisterInfo.td, so we need to use it's container
value type to get it's RegisterClass.

2. We also need to fix the error between scalable vector type
and fixed vector type. Take v32i8 to nxv8i8 as example,if don't
apply this patch, the type become as follow in turn:

'v32i8 -> i256 -> i128 -> i64 -> nxv8i8'

SelectionDAG don't support translate i64 into nxv8i8, and it will
get a fatal error. In order to fix this error, I check their vector
type and RegisterClass, and if eligible, use insert_subvector to
translate v32i8 into nxv8i8.
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  5 ++
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  3 +
 .../RISCV/inline-asm-fixed-v-constraint.ll    | 72 +++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-inlineasm.ll      | 28 ++++++++
 4 files changed, 108 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/inline-asm-fixed-v-constraint.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inlineasm.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9d729d448502d8..d2559f13ba1e66 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -721,6 +721,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                            PartVT.getVectorElementCount());
       SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
       Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
+    } else if (PartVT.isScalableVector() && ValueVT.isFixedLengthVector() &&
+               TLI.getRegClassFor(PartVT) ==
+                   TLI.getRegClassFor(ValueVT.getSimpleVT())) {
+      Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
+                        Val, DAG.getVectorIdxConstant(0, DL));
     } else {
       // Don't extract an integer from a float vector. This can happen if the
       // FP type gets softened to integer and then promoted. The promotion
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 976b2478b433e5..0191cbfb898472 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20494,6 +20494,9 @@ std::pair<unsigned, const TargetRegisterClass *>
 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                   StringRef Constraint,
                                                   MVT VT) const {
+  if (Subtarget.useRVVForFixedLengthVectors() && VT.isFixedLengthVector() &&
+      useRVVForFixedLengthVectorVT(VT))
+    VT = getContainerForFixedLengthVector(VT);
   // First, see if this is a constraint that directly corresponds to a RISC-V
   // register class.
   if (Constraint.size() == 1) {
diff --git a/llvm/test/CodeGen/RISCV/inline-asm-fixed-v-constraint.ll b/llvm/test/CodeGen/RISCV/inline-asm-fixed-v-constraint.ll
new file mode 100644
index 00000000000000..f92a3957398c41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/inline-asm-fixed-v-constraint.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I %s
+
+define <16 x i8> @constraint_vr(<16 x i8> %0, <16 x i8> %1) nounwind {
+; RV32I-LABEL: constraint_vr:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    vadd.vv v8, v8, v9
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: constraint_vr:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    vadd.vv v8, v8, v9
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
+  %a = tail call <16 x i8> asm "vadd.vv $0, $1, $2", "=^vr,^vr,^vr"(
+    <16 x i8> %0, <16 x i8> %1)
+  ret <16 x i8> %a
+}
+
+define <16 x i8> @constraint_vd(<16 x i8> %0, <16 x i8> %1) nounwind {
+; RV32I-LABEL: constraint_vd:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    vadd.vv v8, v8, v9
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: constraint_vd:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    vadd.vv v8, v8, v9
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
+  %a = tail call <16 x i8> asm "vadd.vv $0, $1, $2", "=^vd,^vr,^vr"(
+    <16 x i8> %0, <16 x i8> %1)
+  ret <16 x i8> %a
+}
+
+define <16 x i1> @constraint_vm(<16 x i1> %0, <16 x i1> %1) nounwind {
+; RV32I-LABEL: constraint_vm:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    vmv1r.v v9, v0
+; RV32I-NEXT:    vmv1r.v v0, v8
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    vadd.vv v8, v9, v0
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV32I-NEXT:    vand.vi v8, v8, 1
+; RV32I-NEXT:    vmsne.vi v0, v8, 0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: constraint_vm:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    vmv1r.v v9, v0
+; RV64I-NEXT:    vmv1r.v v0, v8
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    vadd.vv v8, v9, v0
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; RV64I-NEXT:    vand.vi v8, v8, 1
+; RV64I-NEXT:    vmsne.vi v0, v8, 0
+; RV64I-NEXT:    ret
+  %a = tail call <16 x i1> asm "vadd.vv $0, $1, $2", "=^vr,^vr,^vm"(
+    <16 x i1> %0, <16 x i1> %1)
+  ret <16 x i1> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inlineasm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inlineasm.ll
new file mode 100644
index 00000000000000..fda1ea686a5edc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-inlineasm.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8>, i64 immarg)
+
+declare <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8>, <32 x i8>, i64 immarg)
+
+define <vscale x 8 x i8> @test(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v12, v9
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    vmacc.vv v10, v8, v12
+; CHECK-EMPTY:
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %4 = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> %0, i64 0)
+  %5 = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> %1, i64 0)
+  %6 = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> %2, i64 0)
+  %7 = tail call <32 x i8> asm sideeffect "vmacc.vv $0, $1, $2\0A\09", "=&^vr,^vr,^vr,0,~{memory}"(<32 x i8> %4, <32 x i8> %5, <32 x i8> %6)
+  %8 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> %7, i64 0)
+  ret <vscale x 8 x i8> %8
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}