[clang] 9be7b0a - [IRGen][AArch64][RISCV] Generalize bitcast between i1 predicate vector and i8 fixed vector. (#76548)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 13 09:46:54 PST 2024
Author: Craig Topper
Date: 2024-02-13T09:46:50-08:00
New Revision: 9be7b0a539f673081bf8d1d5a5b08135190fd46d
URL: https://github.com/llvm/llvm-project/commit/9be7b0a539f673081bf8d1d5a5b08135190fd46d
DIFF: https://github.com/llvm/llvm-project/commit/9be7b0a539f673081bf8d1d5a5b08135190fd46d.diff
LOG: [IRGen][AArch64][RISCV] Generalize bitcast between i1 predicate vector and i8 fixed vector. (#76548)
Instead of only handling vscale x 16 x i1 predicate vectors, handle any
scalable i1 vector where the known minimum is divisible by 8.
This is used on RISC-V where we have multiple sizes of predicate
types.
Added:
Modified:
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c
clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index cd26a3df78602c..d05cf1c6e1814e 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1301,27 +1301,25 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// If coercing a fixed vector to a scalable vector for ABI compatibility, and
// the types match, use the llvm.vector.insert intrinsic to perform the
// conversion.
- if (auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(Ty)) {
- if (auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
- // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+ if (auto *ScalableDstTy = dyn_cast<llvm::ScalableVectorType>(Ty)) {
+ if (auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
+ // If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, use a vector insert and bitcast the result.
- bool NeedsBitcast = false;
- auto PredType =
- llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16);
- llvm::Type *OrigType = Ty;
- if (ScalableDst == PredType &&
- FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) {
- ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2);
- NeedsBitcast = true;
+ if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
+ ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
+ FixedSrcTy->getElementType()->isIntegerTy(8)) {
+ ScalableDstTy = llvm::ScalableVectorType::get(
+ FixedSrcTy->getElementType(),
+ ScalableDstTy->getElementCount().getKnownMinValue() / 8);
}
- if (ScalableDst->getElementType() == FixedSrc->getElementType()) {
+ if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
auto *Load = CGF.Builder.CreateLoad(Src);
- auto *UndefVec = llvm::UndefValue::get(ScalableDst);
+ auto *UndefVec = llvm::UndefValue::get(ScalableDstTy);
auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
llvm::Value *Result = CGF.Builder.CreateInsertVector(
- ScalableDst, UndefVec, Load, Zero, "cast.scalable");
- if (NeedsBitcast)
- Result = CGF.Builder.CreateBitCast(Result, OrigType);
+ ScalableDstTy, UndefVec, Load, Zero, "cast.scalable");
+ if (ScalableDstTy != Ty)
+ Result = CGF.Builder.CreateBitCast(Result, Ty);
return Result;
}
}
@@ -3199,13 +3197,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::Value *Coerced = Fn->getArg(FirstIRArg);
if (auto *VecTyFrom =
dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) {
- // If we are casting a scalable 16 x i1 predicate vector to a fixed i8
+ // If we are casting a scalable i1 predicate vector to a fixed i8
// vector, bitcast the source and use a vector extract.
- auto PredType =
- llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
- if (VecTyFrom == PredType &&
+ if (VecTyFrom->getElementType()->isIntegerTy(1) &&
+ VecTyFrom->getElementCount().isKnownMultipleOf(8) &&
VecTyTo->getElementType() == Builder.getInt8Ty()) {
- VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
+ VecTyFrom = llvm::ScalableVectorType::get(
+ VecTyTo->getElementType(),
+ VecTyFrom->getElementCount().getKnownMinValue() / 8);
Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
}
if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
@@ -5877,12 +5876,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If coercing a fixed vector from a scalable vector for ABI
// compatibility, and the types match, use the llvm.vector.extract
// intrinsic to perform the conversion.
- if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
+ if (auto *FixedDstTy = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
llvm::Value *V = CI;
- if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(V->getType())) {
- if (FixedDst->getElementType() == ScalableSrc->getElementType()) {
+ if (auto *ScalableSrcTy =
+ dyn_cast<llvm::ScalableVectorType>(V->getType())) {
+ if (FixedDstTy->getElementType() == ScalableSrcTy->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
- V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed");
+ V = Builder.CreateExtractVector(FixedDstTy, V, Zero, "cast.fixed");
return RValue::get(V);
}
}
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index fa03163bbde577..aa805f291d1757 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2137,26 +2137,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// If Src is a fixed vector and Dst is a scalable vector, and both have the
// same element type, use the llvm.vector.insert intrinsic to perform the
// bitcast.
- if (const auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
- if (const auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(DstTy)) {
- // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+ if (auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
+ if (auto *ScalableDstTy = dyn_cast<llvm::ScalableVectorType>(DstTy)) {
+ // If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, use a vector insert and bitcast the result.
- bool NeedsBitCast = false;
- auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
- llvm::Type *OrigType = DstTy;
- if (ScalableDst == PredType &&
- FixedSrc->getElementType() == Builder.getInt8Ty()) {
- DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
- ScalableDst = cast<llvm::ScalableVectorType>(DstTy);
- NeedsBitCast = true;
+ if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
+ ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
+ FixedSrcTy->getElementType()->isIntegerTy(8)) {
+ ScalableDstTy = llvm::ScalableVectorType::get(
+ FixedSrcTy->getElementType(),
+ ScalableDstTy->getElementCount().getKnownMinValue() / 8);
}
- if (FixedSrc->getElementType() == ScalableDst->getElementType()) {
- llvm::Value *UndefVec = llvm::UndefValue::get(DstTy);
+ if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) {
+ llvm::Value *UndefVec = llvm::UndefValue::get(ScalableDstTy);
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
llvm::Value *Result = Builder.CreateInsertVector(
- DstTy, UndefVec, Src, Zero, "cast.scalable");
- if (NeedsBitCast)
- Result = Builder.CreateBitCast(Result, OrigType);
+ ScalableDstTy, UndefVec, Src, Zero, "cast.scalable");
+ if (Result->getType() != DstTy)
+ Result = Builder.CreateBitCast(Result, DstTy);
return Result;
}
}
@@ -2165,18 +2163,19 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// If Src is a scalable vector and Dst is a fixed vector, and both have the
// same element type, use the llvm.vector.extract intrinsic to perform the
// bitcast.
- if (const auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
- if (const auto *FixedDst = dyn_cast<llvm::FixedVectorType>(DstTy)) {
- // If we are casting a scalable 16 x i1 predicate vector to a fixed i8
+ if (auto *ScalableSrcTy = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
+ if (auto *FixedDstTy = dyn_cast<llvm::FixedVectorType>(DstTy)) {
+ // If we are casting a scalable i1 predicate vector to a fixed i8
// vector, bitcast the source and use a vector extract.
- auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
- if (ScalableSrc == PredType &&
- FixedDst->getElementType() == Builder.getInt8Ty()) {
- SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
- ScalableSrc = cast<llvm::ScalableVectorType>(SrcTy);
- Src = Builder.CreateBitCast(Src, SrcTy);
+ if (ScalableSrcTy->getElementType()->isIntegerTy(1) &&
+ ScalableSrcTy->getElementCount().isKnownMultipleOf(8) &&
+ FixedDstTy->getElementType()->isIntegerTy(8)) {
+ ScalableSrcTy = llvm::ScalableVectorType::get(
+ FixedDstTy->getElementType(),
+ ScalableSrcTy->getElementCount().getKnownMinValue() / 8);
+ Src = Builder.CreateBitCast(Src, ScalableSrcTy);
}
- if (ScalableSrc->getElementType() == FixedDst->getElementType()) {
+ if (ScalableSrcTy->getElementType() == FixedDstTy->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
return Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed");
}
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c
index a7b3123e61cd52..20fb4a04564c75 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c
@@ -177,29 +177,26 @@ void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) {
// CHECK-64-LABEL: @read_bool1(
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
-// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
-// CHECK-64-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> undef, <8 x i8> [[TMP0]], i64 0)
+// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-64-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
// CHECK-128-LABEL: @read_bool1(
// CHECK-128-NEXT: entry:
-// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i8>, align 16
// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16
// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
-// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]]
-// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]]
+// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> [[TMP0]], i64 0)
+// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-128-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
// CHECK-256-LABEL: @read_bool1(
// CHECK-256-NEXT: entry:
-// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32
// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32
// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
-// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
-// CHECK-256-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
+// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[TMP0]], i64 0)
+// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-256-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
vbool1_t read_bool1(struct struct_bool1 *s) {
@@ -208,29 +205,26 @@ vbool1_t read_bool1(struct struct_bool1 *s) {
// CHECK-64-LABEL: @write_bool1(
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-64-NEXT: store <vscale x 64 x i1> [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
+// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 8
-// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: ret void
//
// CHECK-128-LABEL: @write_bool1(
// CHECK-128-NEXT: entry:
-// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 16
-// CHECK-128-NEXT: store <vscale x 64 x i1> [[X:%.*]], ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-128-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[SAVED_VALUE]], align 16, !tbaa [[TBAA4]]
+// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
+// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 16
-// CHECK-128-NEXT: store <16 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-128-NEXT: store <16 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_bool1(
// CHECK-256-NEXT: entry:
-// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-256-NEXT: store <vscale x 64 x i1> [[X:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
+// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[X:%.*]] to <vscale x 8 x i8>
+// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 32
-// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
// CHECK-256-NEXT: ret void
//
void write_bool1(struct struct_bool1 *s, vbool1_t x) {
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
index 888abe1a7bc3fb..1824d97d04dda8 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
@@ -70,13 +70,7 @@ fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2)
// CHECK-LABEL: @call_bool1_ff(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[OP1_COERCE:%.*]], <vscale x 64 x i1> [[OP2_COERCE:%.*]], i64 256)
-// CHECK-NEXT: store <vscale x 64 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA4:![0-9]+]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE4]], align 8, !tbaa [[TBAA8:![0-9]+]]
-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[TMP0:%.*]], <vscale x 64 x i1> [[TMP1:%.*]], i64 256)
// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP2]]
//
fixed_bool1_t call_bool1_ff(fixed_bool1_t op1, fixed_bool1_t op2) {
@@ -116,14 +110,8 @@ fixed_float64m1_t call_float64_fs(fixed_float64m1_t op1, vfloat64m1_t op2) {
// CHECK-LABEL: @call_bool1_fs(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[OP1_COERCE:%.*]], <vscale x 64 x i1> [[OP2:%.*]], i64 256)
-// CHECK-NEXT: store <vscale x 64 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA4]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE2]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[TMP0:%.*]], <vscale x 64 x i1> [[OP2:%.*]], i64 256)
+// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
fixed_bool1_t call_bool1_fs(fixed_bool1_t op1, vbool1_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen);
@@ -162,14 +150,8 @@ fixed_float64m1_t call_float64_ss(vfloat64m1_t op1, vfloat64m1_t op2) {
// CHECK-LABEL: @call_bool1_ss(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[OP1:%.*]], <vscale x 64 x i1> [[OP2:%.*]], i64 256)
-// CHECK-NEXT: store <vscale x 64 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
-// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP2]]
+// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP0]]
//
fixed_bool1_t call_bool1_ss(vbool1_t op1, vbool1_t op2) {
return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen);
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
index fe278174bf6817..3806c3e1b30bbf 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
@@ -65,13 +65,7 @@ fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) {
// CHECK-LABEL: @from_vbool1_t(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-NEXT: store <vscale x 64 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4:![0-9]+]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA8:![0-9]+]]
-// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP1]]
+// CHECK-NEXT: ret <vscale x 64 x i1> [[TYPE:%.*]]
//
fixed_bool1_t from_vbool1_t(vbool1_t type) {
return type;
@@ -79,7 +73,7 @@ fixed_bool1_t from_vbool1_t(vbool1_t type) {
// CHECK-LABEL: @to_vbool1_t(
// CHECK-NEXT: entry:
-// CHECK-NEXT: ret <vscale x 64 x i1> [[TYPE_COERCE:%.*]]
+// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP0:%.*]]
//
vbool1_t to_vbool1_t(fixed_bool1_t type) {
return type;
@@ -105,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
-// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8]]
+// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
// CHECK-NEXT: ret <vscale x 2 x i1> [[TMP1]]
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
index ac22bdce0da3e5..eb769fadda9a85 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
@@ -53,25 +53,24 @@ fixed_bool32_t global_bool32;
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca <vscale x 64 x i1>, align 1
// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca <vscale x 64 x i8>, align 1
// CHECK-NEXT: [[MASK:%.*]] = alloca <vscale x 64 x i1>, align 1
-// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32
// CHECK-NEXT: store <vscale x 64 x i1> [[M:%.*]], ptr [[M_ADDR]], align 1
// CHECK-NEXT: store <vscale x 64 x i8> [[VEC:%.*]], ptr [[VEC_ADDR]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 64 x i1>, ptr [[M_ADDR]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @global_bool1, align 8
-// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[SAVED_VALUE]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 32
+// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1.i64(<vscale x 64 x i1> [[TMP0]], <vscale x 64 x i1> [[TMP2]], i64 256)
// CHECK-NEXT: store <vscale x 64 x i1> [[TMP3]], ptr [[MASK]], align 1
// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 64 x i1>, ptr [[MASK]], align 1
// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 64 x i8>, ptr [[VEC_ADDR]], align 1
// CHECK-NEXT: [[TMP6:%.*]] = load <256 x i8>, ptr @global_vec_int8m8, align 8
-// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.v256i8(<vscale x 64 x i8> undef, <256 x i8> [[TMP6]], i64 0)
-// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vadd.mask.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[TMP5]], <vscale x 64 x i8> [[CAST_SCALABLE]], <vscale x 64 x i1> [[TMP4]], i64 256, i64 3)
+// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.v256i8(<vscale x 64 x i8> undef, <256 x i8> [[TMP6]], i64 0)
+// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vadd.mask.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> [[TMP5]], <vscale x 64 x i8> [[CAST_SCALABLE1]], <vscale x 64 x i1> [[TMP4]], i64 256, i64 3)
// CHECK-NEXT: [[CAST_FIXED:%.*]] = call <256 x i8> @llvm.vector.extract.v256i8.nxv64i8(<vscale x 64 x i8> [[TMP7]], i64 0)
// CHECK-NEXT: store <256 x i8> [[CAST_FIXED]], ptr [[RETVAL]], align 8
// CHECK-NEXT: [[TMP8:%.*]] = load <256 x i8>, ptr [[RETVAL]], align 8
-// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.v256i8(<vscale x 64 x i8> undef, <256 x i8> [[TMP8]], i64 0)
-// CHECK-NEXT: ret <vscale x 64 x i8> [[CAST_SCALABLE1]]
+// CHECK-NEXT: [[CAST_SCALABLE2:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.v256i8(<vscale x 64 x i8> undef, <256 x i8> [[TMP8]], i64 0)
+// CHECK-NEXT: ret <vscale x 64 x i8> [[CAST_SCALABLE2]]
//
fixed_int8m8_t test_bool1(vbool1_t m, vint8m8_t vec) {
vbool1_t mask = __riscv_vmand(m, global_bool1, __riscv_v_fixed_vlen);
@@ -181,15 +180,15 @@ fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) {
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <32 x i8>, align 8
// CHECK-NEXT: [[ARR:%.*]] = alloca [3 x <32 x i8>], align 8
// CHECK-NEXT: [[PARR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 64 x i1>, align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <32 x i8>], ptr [[ARR]], i64 0, i64 0
// CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[PARR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PARR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 8
// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[RETVAL]], align 8
-// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 8 [[RETVAL]], i64 32, i1 false)
-// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 64 x i1>, ptr [[RETVAL_COERCE]], align 8
-// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP2]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr [[RETVAL]], align 8
+// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
+// CHECK-NEXT: ret <vscale x 64 x i1> [[TMP3]]
//
fixed_bool1_t address_of_array_idx_bool1() {
fixed_bool1_t arr[3];
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c
index d7df1a24bbfb00..31a245dcb22405 100644
--- a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c
@@ -56,18 +56,16 @@ void write_global_i64(vint64m1_t v) { global_i64 = v; }
// CHECK-64-LABEL: @write_global_bool1(
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-64-NEXT: store <vscale x 64 x i1> [[V:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
-// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr @global_bool1, align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[V:%.*]] to <vscale x 8 x i8>
+// CHECK-64-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
+// CHECK-64-NEXT: store <8 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA4]]
// CHECK-64-NEXT: ret void
//
// CHECK-256-LABEL: @write_global_bool1(
// CHECK-256-NEXT: entry:
-// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 64 x i1>, align 8
-// CHECK-256-NEXT: store <vscale x 64 x i1> [[V:%.*]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
-// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr @global_bool1, align 8, !tbaa [[TBAA4]]
+// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 64 x i1> [[V:%.*]] to <vscale x 8 x i8>
+// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[TMP0]], i64 0)
+// CHECK-256-NEXT: store <32 x i8> [[CAST_FIXED]], ptr @global_bool1, align 8, !tbaa [[TBAA4]]
// CHECK-256-NEXT: ret void
//
void write_global_bool1(vbool1_t v) { global_bool1 = v; }
@@ -92,7 +90,7 @@ void write_global_bool4(vbool4_t v) { global_bool4 = v; }
// CHECK-256-LABEL: @write_global_bool32(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
-// CHECK-256-NEXT: store <vscale x 2 x i1> [[V:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]]
+// CHECK-256-NEXT: store <vscale x 2 x i1> [[V:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA7:![0-9]+]]
// CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4]]
// CHECK-256-NEXT: store <1 x i8> [[TMP0]], ptr @global_bool32, align 1, !tbaa [[TBAA4]]
// CHECK-256-NEXT: ret void
@@ -120,18 +118,16 @@ vint64m1_t read_global_i64() { return global_i64; }
// CHECK-64-LABEL: @read_global_bool1(
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA4]]
-// CHECK-64-NEXT: store <8 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
-// CHECK-64-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> undef, <8 x i8> [[TMP0]], i64 0)
+// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-64-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
// CHECK-256-LABEL: @read_global_bool1(
// CHECK-256-NEXT: entry:
-// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <32 x i8>, align 32
// CHECK-256-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr @global_bool1, align 8, !tbaa [[TBAA4]]
-// CHECK-256-NEXT: store <32 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
-// CHECK-256-NEXT: [[TMP1:%.*]] = load <vscale x 64 x i1>, ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
+// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[TMP0]], i64 0)
+// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
// CHECK-256-NEXT: ret <vscale x 64 x i1> [[TMP1]]
//
vbool1_t read_global_bool1() { return global_bool1; }
More information about the cfe-commits
mailing list