[clang] [Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm (PR #67109)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 22 02:45:42 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
<details>
<summary>Changes</summary>
This PR is based on #<!-- -->67018. This PR fixes compilation issue for RVV tuple types as InputOperands for inline asm.
---
Currently the compiler generates https://godbolt.org/z/djebPfqxf for tuple type as inline asm inputs and cannot be code generated successfully https://godbolt.org/z/na7T19Krc. This PR fixes Clang by generating https://godbolt.org/z/MsovoxbY9 instead, which can be successfully handled by the back-end.
A follow-up PR will handle interactions of RVV tuple type InputOperands and OutputOperands correctly.
---
Full diff: https://github.com/llvm/llvm-project/pull/67109.diff
2 Files Affected:
- (modified) clang/lib/CodeGen/CGStmt.cpp (+101-6)
- (added) clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c (+54)
``````````diff
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6674aa2409a5947..4a2bdde56c5704e 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -19,6 +19,7 @@
#include "clang/AST/Expr.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/PrettyStackTrace.h"
@@ -29,10 +30,13 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include <optional>
@@ -2392,6 +2396,26 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
} else if (TruncTy->isVectorTy()) {
Tmp = Builder.CreateBitCast(Tmp, TruncTy);
+ } else if (TruncTy->isStructTy() && ResultRegQualTys[i]->isRVVType()) {
+ auto *STy = cast<llvm::StructType>(TruncTy);
+ auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ "Must be dealing with RVV tuple type");
+
+ unsigned MinElts = VTy->getElementCount().getKnownMinValue();
+ llvm::Value *StructValue = llvm::PoisonValue::get(STy);
+
+ for (unsigned Idx = 0, TupleSize = STy->getNumElements();
+ Idx != TupleSize; ++Idx) {
+ llvm::Value *IdxValue =
+ llvm::ConstantInt::get(CGM.Int64Ty, Idx * MinElts);
+ llvm::Value *SubVec = Builder.CreateExtractVector(VTy, Tmp, IdxValue);
+
+ StructValue = Builder.CreateInsertValue(StructValue, SubVec, Idx);
+ }
+
+ Tmp = StructValue;
}
}
@@ -2399,7 +2423,13 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
// ResultTypeRequiresCast elements correspond to the first
// ResultTypeRequiresCast.size() elements of RegResults.
if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
- unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
+ unsigned Size;
+ if (ResultRegQualTys[i]->isRVVType() && TruncTy->isStructTy()) {
+ Size = cast<llvm::ScalableVectorType>(
+ cast<llvm::StructType>(TruncTy)->getElementType(0))
+ ->getScalarSizeInBits();
+ } else
+ Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
Builder.CreateStore(Tmp, A);
@@ -2524,11 +2554,32 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
ResultRegIsFlagReg.push_back(IsFlagReg);
llvm::Type *Ty = ConvertTypeForMem(QTy);
+ ResultTruncRegTypes.push_back(Ty);
+
+ // Expressing the type as a structure in inline asm calls will complicate
+ // the current code case, so instead, the return type is set to be a
+ // single scalable vector, then reconstructed with `vector.extract` and
+ // `insertvalue`. The type is derived here, and the reconstruction is done
+ // under EmitAsmStores.
+ if (QTy->isRVVType() && isa<llvm::StructType>(Ty)) {
+ // Flatten the structure into a single ScalableVectorType
+ auto *STy = cast<llvm::StructType>(Ty);
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ isa<llvm::ScalableVectorType>(STy->getElementType(0)) &&
+ "Dealing with RVV tuple (aggregate with homogeneous scalable "
+ "vectors");
+
+ auto *VecTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+ Ty = llvm::ScalableVectorType::get(VecTy->getScalarType(),
+ STy->getNumElements() *
+ VecTy->getMinNumElements());
+ }
+
const bool RequiresCast = Info.allowsRegister() &&
(getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
Ty->isAggregateType());
- ResultTruncRegTypes.push_back(Ty);
ResultTypeRequiresCast.push_back(RequiresCast);
if (RequiresCast) {
@@ -2551,6 +2602,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
QualType InputTy = S.getInputExpr(InputNo)->getType();
QualType OutputType = OutExpr->getType();
+ if ((InputTy->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(InputTy))) ||
+ (OutputType->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(OutputType)))) {
+ llvm_unreachable("FIXME: Deal with RVV type matching.");
+ }
+
uint64_t InputSize = getContext().getTypeSize(InputTy);
if (getContext().getTypeSize(OutputType) < InputSize) {
// Form the asm to return the value as a larger integer or fp type.
@@ -2671,6 +2729,13 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
QualType OutputType = S.getOutputExpr(Output)->getType();
QualType InputTy = InputExpr->getType();
+ if ((InputTy->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(InputTy))) ||
+ (OutputType->isRVVType() &&
+ isa<llvm::StructType>(ConvertType(OutputType)))) {
+ llvm_unreachable("FIXME: Deal with RVV type matching.");
+ }
+
if (getContext().getTypeSize(OutputType) >
getContext().getTypeSize(InputTy)) {
// Use ptrtoint as appropriate so that we can do our extension.
@@ -2701,10 +2766,40 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
std::max((uint64_t)LargestVectorWidth,
VT->getPrimitiveSizeInBits().getKnownMinValue());
- ArgTypes.push_back(Arg->getType());
- ArgElemTypes.push_back(ArgElemType);
- Args.push_back(Arg);
- Constraints += InputConstraint;
+ // Expand RVV tuple type input operands.
+ if (InputExpr->getType()->isRVVType() && Arg->getType()->isStructTy()) {
+ std::string ExpandedInputContraint;
+
+ auto *STy = cast<llvm::StructType>(Arg->getType());
+
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ isa<llvm::ScalableVectorType>(STy->getElementType(0)) &&
+ "Only aggregate type of homogeneous scalable vectors is handled "
+ "here");
+
+ auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+ for (unsigned Idx = 0, TupleSize = STy->getNumElements();
+ Idx != TupleSize; ++Idx) {
+ if (ExpandedInputContraint.size())
+ ExpandedInputContraint += ",";
+
+ ExpandedInputContraint += InputConstraint;
+ ArgTypes.push_back(VTy);
+ ArgElemTypes.push_back(ArgElemType);
+
+ llvm::Value *SubVec = Builder.CreateExtractValue(Arg, {Idx});
+
+ Args.push_back(SubVec);
+ }
+
+ Constraints += ExpandedInputContraint;
+ } else {
+ ArgTypes.push_back(Arg->getType());
+ ArgElemTypes.push_back(ArgElemType);
+ Args.push_back(Arg);
+ Constraints += InputConstraint;
+ }
}
// Append the "input" part of inout constraints.
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
new file mode 100644
index 000000000000000..24f403c6625d0aa
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
@@ -0,0 +1,54 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
+#include <riscv_vector.h>
+
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
+// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @foo(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> asm "#NOP", "=^vr"() #[[ATTR2:[0-9]+]], !srcloc !4
+// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0
+// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 2)
+// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1
+// CHECK-NEXT: ret void
+//
+void foo() {
+ vint32m1x2_t v0;
+ asm ("#NOP" : "=vr" (v0));
+}
+
+// CHECK-LABEL: define dso_local void @bar(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } asm "#NOP", "=^vr,=^vr"() #[[ATTR2]], !srcloc !5
+// CHECK-NEXT: [[ASMRESULT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 0)
+// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP1]], 0
+// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT]], i64 2)
+// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], <vscale x 2 x i32> [[TMP3]], 1
+// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 0)
+// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[TMP5]], 0
+// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> [[ASMRESULT1]], i64 2)
+// CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP6]], <vscale x 2 x i32> [[TMP7]], 1
+// CHECK-NEXT: ret void
+//
+void bar() {
+ vint32m1x2_t v0, v2;
+ asm ("#NOP" : "=vr" (v0), "=vr" (v2));
+}
+
+// CHECK-LABEL: define dso_local void @baz(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 0
+// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 1
+// CHECK-NEXT: call void asm sideeffect "#NOP", "^vr,^vr"(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i32> [[TMP1]]) #[[ATTR3:[0-9]+]], !srcloc !6
+// CHECK-NEXT: ret void
+//
+void baz() {
+ vint32m1x2_t v2;
+ asm ("#NOP" :: "vr" (v2));
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/67109
More information about the cfe-commits
mailing list