[clang] [Clang][RISCV] Handle RVV tuple types correctly as OutputOperands for inline asm (PR #67109)

Yueh-Ting Chen via cfe-commits cfe-commits at lists.llvm.org
Sun Sep 24 23:54:58 PDT 2023


https://github.com/eopXD updated https://github.com/llvm/llvm-project/pull/67109

>From 6f232ccd6dd273b9eecec7f583877a3a5a3696c3 Mon Sep 17 00:00:00 2001
From: eopXD <yueh.ting.chen at gmail.com>
Date: Thu, 21 Sep 2023 06:34:57 -0700
Subject: [PATCH 1/3] [Clang][RISCV] Handle RVV tuple types correctly as
 OutputOperand for inline asm

The RVV tuple type maps to an aggregate type with homogeneous scalable
vectors. EmitAsmStmt does not handle this correctly and this commit
attempts to fix it.

Get pass validation check for homogeneous scalable vector types in
InlineAsm::verify.

Handle RVV tuple types correctly under CGStmt.cpp:EmitAsmStores, since
we can allow direct store for the tuple types.

A follow-up commit will deal with details when associated with
InputOperands.
---
 clang/lib/CodeGen/CGStmt.cpp                  | 38 +++++++++++--------
 .../rvv-inline-asm.c                          | 29 ++++++++++++++
 llvm/lib/IR/InlineAsm.cpp                     | 11 +++++-
 3 files changed, 61 insertions(+), 17 deletions(-)
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6674aa2409a5947..bb9606178de8a3f 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -29,6 +29,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
@@ -2399,22 +2400,27 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
     // ResultTypeRequiresCast elements correspond to the first
     // ResultTypeRequiresCast.size() elements of RegResults.
     if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
-      unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
-      Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
-      if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
-        Builder.CreateStore(Tmp, A);
-        continue;
-      }
+      if (ResultRegQualTys[i]->isRVVType() && Tmp->getType()->isStructTy()) {
+        Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
+        Dest = CGF.MakeAddrLValue(A, ResultRegQualTys[i]);
+      } else {
+        unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
+        Address A = Dest.getAddress(CGF).withElementType(ResultRegTypes[i]);
+        if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
+          Builder.CreateStore(Tmp, A);
+          continue;
+        }
 
-      QualType Ty =
-          CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
-      if (Ty.isNull()) {
-        const Expr *OutExpr = S.getOutputExpr(i);
-        CGM.getDiags().Report(OutExpr->getExprLoc(),
-                              diag::err_store_value_to_reg);
-        return;
+        QualType Ty =
+            CGF.getContext().getIntTypeForBitwidth(Size, /*Signed=*/false);
+        if (Ty.isNull()) {
+          const Expr *OutExpr = S.getOutputExpr(i);
+          CGM.getDiags().Report(OutExpr->getExprLoc(),
+                                diag::err_store_value_to_reg);
+          return;
+        }
+        Dest = CGF.MakeAddrLValue(A, Ty);
       }
-      Dest = CGF.MakeAddrLValue(A, Ty);
     }
     CGF.EmitStoreThroughLValue(RValue::get(Tmp), Dest);
   }
@@ -2531,7 +2537,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
       ResultTruncRegTypes.push_back(Ty);
       ResultTypeRequiresCast.push_back(RequiresCast);
 
-      if (RequiresCast) {
+      // Allow RVV tuple type (aggregate of homogeneous scalable vector) to be
+      // pushed into return type of inline asm call.
+      if (RequiresCast && !(QTy->isRVVType() && Ty->isStructTy())) {
         unsigned Size = getContext().getTypeSize(QTy);
         Ty = llvm::IntegerType::get(getLLVMContext(), Size);
       }
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
new file mode 100644
index 000000000000000..6d517714179893f
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
@@ -0,0 +1,29 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
+#include <riscv_vector.h>
+
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @foo(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } asm "#NOP", "=^vr"() #[[ATTR1:[0-9]+]], !srcloc !4
+// CHECK-NEXT:    ret void
+//
+void foo() {
+  vint32m1x2_t v0;
+  asm ("#NOP" : "=vr" (v0));
+}
+
+// CHECK-LABEL: define dso_local void @bar(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } asm "#NOP", "=^vr,=^vr"() #[[ATTR1]], !srcloc !5
+// CHECK-NEXT:    [[ASMRESULT:%.*]] = extractvalue { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } [[TMP0]], 0
+// CHECK-NEXT:    [[ASMRESULT1:%.*]] = extractvalue { { <vscale x 2 x i32>, <vscale x 2 x i32> }, { <vscale x 2 x i32>, <vscale x 2 x i32> } } [[TMP0]], 1
+// CHECK-NEXT:    ret void
+//
+void bar() {
+  vint32m1x2_t v0, v2;
+  asm ("#NOP" : "=vr" (v0), "=vr" (v2));
+}
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index aeaa6a3741b949a..319678cbb8fe164 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -321,8 +321,15 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) {
       return makeStringError("inline asm without outputs must return void");
     break;
   case 1:
-    if (Ty->getReturnType()->isStructTy())
-      return makeStringError("inline asm with one output cannot return struct");
+    if (Ty->getReturnType()->isStructTy()) {
+      // The return type may be a structure if the output operand is from RVV
+      // tuple types. If so the structure must be a structure with homogeneous
+      // scalable vector types.
+      if (!cast<StructType>(Ty->getReturnType())
+               ->containsHomogeneousScalableVectorTypes())
+        return makeStringError(
+            "inline asm with one output cannot return struct");
+    }
     break;
   default:
     StructType *STy = dyn_cast<StructType>(Ty->getReturnType());

>From a825dc6fbf54f85cd5b929cfd808afb2b7729f71 Mon Sep 17 00:00:00 2001
From: eopXD <yueh.ting.chen at gmail.com>
Date: Fri, 22 Sep 2023 02:30:58 -0700
Subject: [PATCH 2/3] [Clang][RISCV] Precommit to show current codegen for
 inline asm of RVV tuple type

The generated LLVM cannot be successfully handled because function
argument of tuple type is an aggregate of scalable vectors. It needs to
be flattened into separate arguments.
---
 .../RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
index 6d517714179893f..3b2f0513b97c777 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
@@ -27,3 +27,14 @@ void bar() {
   vint32m1x2_t v0, v2;
   asm ("#NOP" : "=vr" (v0), "=vr" (v2));
 }
+
+// CHECK-LABEL: define dso_local void @baz(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    call void asm sideeffect "#NOP", "^vr"({ <vscale x 2 x i32>, <vscale x 2 x i32> } undef) #[[ATTR2:[0-9]+]], !srcloc !6
+// CHECK-NEXT:    ret void
+//
+void baz() {
+  vint32m1x2_t v2;
+  asm ("#NOP" :: "vr" (v2));
+}

>From 46dbc841e526cc4e4b17e6f5e70b6fbb28544856 Mon Sep 17 00:00:00 2001
From: eopXD <yueh.ting.chen at gmail.com>
Date: Fri, 22 Sep 2023 02:32:38 -0700
Subject: [PATCH 3/3] [Clang][RISCV] Handle RVV tuple types correctly as
 InputOperand for inline asm

This commit flatten the input operand from an aggregate structure into
separate arguments for RVV tuple types.
---
 clang/lib/CodeGen/CGStmt.cpp                  | 38 +++++++++++++++++--
 .../rvv-inline-asm.c                          |  4 +-
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index bb9606178de8a3f..a275c458f70b00a 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2709,10 +2709,40 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
           std::max((uint64_t)LargestVectorWidth,
                    VT->getPrimitiveSizeInBits().getKnownMinValue());
 
-    ArgTypes.push_back(Arg->getType());
-    ArgElemTypes.push_back(ArgElemType);
-    Args.push_back(Arg);
-    Constraints += InputConstraint;
+    // Expand RVV tuple type input operands.
+    if (InputExpr->getType()->isRVVType() && Arg->getType()->isStructTy()) {
+      std::string ExpandedInputContraint;
+
+      auto *STy = cast<llvm::StructType>(Arg->getType());
+
+      assert(STy->containsHomogeneousScalableVectorTypes() &&
+             isa<llvm::ScalableVectorType>(STy->getElementType(0)) &&
+             "Only aggregate type of homogeneous scalable vectors is handled "
+             "here");
+
+      auto *VTy = cast<llvm::ScalableVectorType>(STy->getElementType(0));
+
+      for (unsigned Idx = 0, TupleSize = STy->getNumElements();
+           Idx != TupleSize; ++Idx) {
+        if (ExpandedInputContraint.size())
+          ExpandedInputContraint += ",";
+
+        ExpandedInputContraint += InputConstraint;
+        ArgTypes.push_back(VTy);
+        ArgElemTypes.push_back(ArgElemType);
+
+        llvm::Value *SubVec = Builder.CreateExtractValue(Arg, {Idx});
+
+        Args.push_back(SubVec);
+      }
+
+      Constraints += ExpandedInputContraint;
+    } else {
+      ArgTypes.push_back(Arg->getType());
+      ArgElemTypes.push_back(ArgElemType);
+      Args.push_back(Arg);
+      Constraints += InputConstraint;
+    }
   }
 
   // Append the "input" part of inout constraints.
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
index 3b2f0513b97c777..9dc6fb27de5432a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-inline-asm.c
@@ -31,7 +31,9 @@ void bar() {
 // CHECK-LABEL: define dso_local void @baz(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    call void asm sideeffect "#NOP", "^vr"({ <vscale x 2 x i32>, <vscale x 2 x i32> } undef) #[[ATTR2:[0-9]+]], !srcloc !6
+// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 0
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, 1
+// CHECK-NEXT:    call void asm sideeffect "#NOP", "^vr,^vr"(<vscale x 2 x i32> [[TMP0]], <vscale x 2 x i32> [[TMP1]]) #[[ATTR2:[0-9]+]], !srcloc !6
 // CHECK-NEXT:    ret void
 //
 void baz() {



More information about the cfe-commits mailing list