[clang] [clang] Constexpr for __builtin_shufflevector and __builtin_convertvector (PR #76615)

Sat Apr 20 03:22:08 PDT 2024

Pol Marcet =?utf-8?q?Sardà?= <polmarcetsarda at gmail.com>,
Pol Marcet =?utf-8?q?Sardà?= <polmarcetsarda at gmail.com>,
Pol Marcet =?utf-8?q?Sardà?= <polmarcetsarda at gmail.com>,
Pol Marcet =?utf-8?q?Sardà?= <polmarcetsarda at gmail.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/76615 at github.com>


https://github.com/Destroyerrrocket updated https://github.com/llvm/llvm-project/pull/76615

>From 7447038a5006ff5fe5fdeead865287930843d8f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Marcet=20Sard=C3=A0?= <polmarcetsarda at gmail.com>
Date: Sat, 30 Dec 2023 13:59:00 +0100
Subject: [PATCH 1/5] [clang] Constexpr for __builtin_shufflevector and
 __builtin_convertvector

Summary:

This patch adds constexpr support for __builtin_shufflevector
and __builtin_convertvector.

A small oddity encountered was that the arg to the intrinsics may be an
lvalue without any sort of implicit cast of any kind. I solved this
through the EvaluateVectorOrLValue function, which treats the lvalue as
if it was in an rvalue cast, which gets me the desired vector.
---
 clang/docs/LanguageExtensions.rst     |   5 +-
 clang/docs/ReleaseNotes.rst           |   3 +
 clang/lib/AST/ExprConstant.cpp        | 138 +++++++++++++++++++++++++-
 clang/test/Sema/constant-builtins-2.c |  61 ++++++++++++
 4 files changed, 204 insertions(+), 3 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 7b23e4d1c2f30c..485e4af69b3e02 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -2925,7 +2925,7 @@ Query for this feature with ``__has_builtin(__builtin_dump_struct)``
 ``__builtin_shufflevector`` is used to express generic vector
 permutation/shuffle/swizzle operations.  This builtin is also very important
 for the implementation of various target-specific header files like
-``<xmmintrin.h>``.
+``<xmmintrin.h>``. This builtin can be used within constant expressions.
 
 **Syntax**:
 
@@ -2979,7 +2979,8 @@ Query for this feature with ``__has_builtin(__builtin_shufflevector)``.
 
 ``__builtin_convertvector`` is used to express generic vector
 type-conversion operations. The input vector and the output vector
-type must have the same number of elements.
+type must have the same number of elements. This builtin can be used within
+constant expressions.
 
 **Syntax**:
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 76eaf0bf11c303..a3c96e44eef4dd 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -193,6 +193,9 @@ Non-comprehensive list of changes in this release
   with support for any unsigned integer type. Like the previous builtins, these
   new builtins are constexpr and may be used in constant expressions.
 
+- Builtins ``__builtin_shufflevector()`` and ``__builtin_convertvector()`` may
+  now be used within constant expressions.
+
 New Compiler Flags
 ------------------
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index dae8f32fc02951..6b43b6a1638fbc 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2706,7 +2706,8 @@ static bool checkFloatingPointResult(EvalInfo &Info, const Expr *E,
 static bool HandleFloatToFloatCast(EvalInfo &Info, const Expr *E,
                                    QualType SrcType, QualType DestType,
                                    APFloat &Result) {
-  assert(isa<CastExpr>(E) || isa<CompoundAssignOperator>(E));
+  assert(isa<CastExpr>(E) || isa<CompoundAssignOperator>(E) ||
+         isa<ConvertVectorExpr>(E));
   llvm::RoundingMode RM = getActiveRoundingMode(Info, E);
   APFloat::opStatus St;
   APFloat Value = Result;
@@ -10709,6 +10710,9 @@ namespace {
     bool VisitUnaryImag(const UnaryOperator *E);
     bool VisitBinaryOperator(const BinaryOperator *E);
     bool VisitUnaryOperator(const UnaryOperator *E);
+    bool VisitConvertVectorExpr(const ConvertVectorExpr *E);
+    bool VisitShuffleVectorExpr(const ShuffleVectorExpr *E);
+
     // FIXME: Missing: conditional operator (for GNU
     //                 conditional select), shufflevector, ExtVectorElementExpr
   };
@@ -10961,6 +10965,138 @@ bool VectorExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) {
   return Success(APValue(ResultElements.data(), ResultElements.size()), E);
 }
 
+static bool EvaluateVectorOrLValue(APValue &Result, EvalInfo &Info,
+                                   const Expr *E, const QualType &Type) {
+  if (!Evaluate(Result, Info, E))
+    return false;
+
+  if (Result.isLValue()) {
+    // Source of the data is an lvalue; Manually handle the lvalue as if
+    // it was an rvalue to get the current APValue.
+    LValue LValueFound;
+    LValueFound.setFrom(Info.Ctx, Result);
+    if (!handleLValueToRValueConversion(Info, E, Type, LValueFound, Result)) {
+      return false;
+    }
+  }
+
+  if (!Result.isVector()) {
+    return false;
+  }
+  return true;
+}
+
+static bool handleVectorConversion(EvalInfo &Info, const FPOptions FPO,
+                                   const Expr *E, QualType SourceTy,
+                                   QualType DestTy, APValue const &Original,
+                                   APValue &Result) {
+  if (SourceTy->isIntegerType()) {
+    if (DestTy->isRealFloatingType()) {
+      Result = APValue(APFloat(0.0));
+      return HandleIntToFloatCast(Info, E, FPO, SourceTy, Original.getInt(),
+                                  DestTy, Result.getFloat());
+    }
+    if (DestTy->isIntegerType()) {
+      Result = APValue(
+          HandleIntToIntCast(Info, E, DestTy, SourceTy, Original.getInt()));
+      return true;
+    }
+  } else if (SourceTy->isRealFloatingType()) {
+    if (DestTy->isRealFloatingType()) {
+      Result = Original;
+      return HandleFloatToFloatCast(Info, E, SourceTy, DestTy,
+                                    Result.getFloat());
+    }
+    if (DestTy->isIntegerType()) {
+      Result = APValue(APSInt());
+      return HandleFloatToIntCast(Info, E, SourceTy, Original.getFloat(),
+                                  DestTy, Result.getInt());
+    }
+  }
+  return false;
+}
+
+bool VectorExprEvaluator::VisitConvertVectorExpr(const ConvertVectorExpr *E) {
+  APValue Source;
+  QualType SourceVecType = E->getSrcExpr()->getType();
+  if (!EvaluateVectorOrLValue(Source, Info, E->getSrcExpr(), SourceVecType))
+    return false;
+
+  QualType DestTy = E->getType()->castAs<VectorType>()->getElementType();
+  QualType SourceTy = SourceVecType->castAs<VectorType>()->getElementType();
+
+  const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts());
+
+  SmallVector<APValue, 4> ResultElements;
+  ResultElements.reserve(Source.getVectorLength());
+  for (unsigned EltNum = 0; EltNum < Source.getVectorLength(); ++EltNum) {
+    APValue Elt;
+    if (!handleVectorConversion(Info, FPO, E, SourceTy, DestTy,
+                                Source.getVectorElt(EltNum), Elt))
+      return false;
+    ResultElements.push_back(std::move(Elt));
+  }
+
+  return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+}
+
+static bool handleVectorShuffle(EvalInfo &Info, const ShuffleVectorExpr *E,
+                                QualType ElemType, APValue const &VecVal1,
+                                APValue const &VecVal2, unsigned EltNum,
+                                APValue &Result) {
+  unsigned const TotalElementsInAVector = VecVal1.getVectorLength();
+
+  Expr const *IndexExpr = E->getExpr(2 + EltNum);
+  APSInt IndexVal;
+  if (!EvaluateInteger(IndexExpr, IndexVal, Info)) {
+    return false;
+  }
+
+  uint32_t index = IndexVal.getZExtValue();
+  // The spec says that -1 should be treated as undef for optimizations,
+  // but in constexpr we need to choose a value. We'll choose 0.
+  if (index >= TotalElementsInAVector * 2) {
+    index = 0;
+  }
+
+  if (index >= TotalElementsInAVector) {
+    Result = VecVal2.getVectorElt(index - TotalElementsInAVector);
+  } else {
+    Result = VecVal1.getVectorElt(index);
+  }
+  return true;
+}
+
+bool VectorExprEvaluator::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
+  APValue VecVal1;
+  const Expr *Vec1 = E->getExpr(0);
+  if (!EvaluateVectorOrLValue(VecVal1, Info, Vec1, Vec1->getType()))
+    return false;
+  APValue VecVal2;
+  const Expr *Vec2 = E->getExpr(1);
+  if (!EvaluateVectorOrLValue(VecVal2, Info, Vec2, Vec2->getType()))
+    return false;
+
+  VectorType const *DestVecTy = E->getType()->castAs<VectorType>();
+  if (!DestVecTy) {
+    return false;
+  }
+  QualType DestElTy = DestVecTy->getElementType();
+
+  auto TotalElementsInOutputVector = DestVecTy->getNumElements();
+
+  SmallVector<APValue, 4> ResultElements;
+  ResultElements.reserve(TotalElementsInOutputVector);
+  for (unsigned EltNum = 0; EltNum < TotalElementsInOutputVector; ++EltNum) {
+    APValue Elt;
+    if (!handleVectorShuffle(Info, E, DestElTy, VecVal1, VecVal2, EltNum, Elt))
+      return false;
+    ResultElements.push_back(std::move(Elt));
+  }
+
+  return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+}
+
 //===----------------------------------------------------------------------===//
 // Array Evaluation
 //===----------------------------------------------------------------------===//
diff --git a/clang/test/Sema/constant-builtins-2.c b/clang/test/Sema/constant-builtins-2.c
index a60a1f16a45874..e6ea0cff94e7e1 100644
--- a/clang/test/Sema/constant-builtins-2.c
+++ b/clang/test/Sema/constant-builtins-2.c
@@ -427,3 +427,64 @@ extern __typeof__(__builtin_expect(0, 0)) bi0;
 // Strings
 int array1[__builtin_strlen("ab\0cd")];
 int array2[(sizeof(array1)/sizeof(int)) == 2? 1 : -1];
+
+typedef double vector4double __attribute__((__vector_size__(32)));
+typedef float vector4float __attribute__((__vector_size__(16)));
+typedef long long vector4long __attribute__((__vector_size__(32)));
+typedef int vector4int __attribute__((__vector_size__(16)));
+typedef short vector4short __attribute__((__vector_size__(8)));
+typedef char vector4char __attribute__((__vector_size__(4)));
+typedef double vector8double __attribute__((__vector_size__(64)));
+typedef float vector8float __attribute__((__vector_size__(32)));
+typedef long long vector8long __attribute__((__vector_size__(64)));
+typedef int vector8int __attribute__((__vector_size__(32)));
+typedef short vector8short __attribute__((__vector_size__(16)));
+typedef char vector8char __attribute__((__vector_size__(8)));
+
+// Convert vector
+#define CHECK_NUM(__size, __typeFrom, __typeTo, ...)                            \
+  vector##__size##__typeTo                                                      \
+      from_##vector##__size##__typeFrom##_to_##vector##__size##__typeTo##_var = \
+          __builtin_convertvector((vector##__size##__typeFrom){__VA_ARGS__},    \
+                                  vector##__size##__typeTo);
+#define CHECK_TO_ALL_TYPES(__size, __typeFrom, ...)                            \
+  CHECK_NUM(__size, __typeFrom, double, __VA_ARGS__)                           \
+  CHECK_NUM(__size, __typeFrom, float, __VA_ARGS__)                            \
+  CHECK_NUM(__size, __typeFrom, long, __VA_ARGS__)                             \
+  CHECK_NUM(__size, __typeFrom, int, __VA_ARGS__)                              \
+  CHECK_NUM(__size, __typeFrom, short, __VA_ARGS__)                            \
+  CHECK_NUM(__size, __typeFrom, char, __VA_ARGS__)
+
+#define CHECK_ALL_COMBINATIONS(__size, ...)                                    \
+  CHECK_TO_ALL_TYPES(__size, double, __VA_ARGS__)                              \
+  CHECK_TO_ALL_TYPES(__size, float, __VA_ARGS__)                               \
+  CHECK_TO_ALL_TYPES(__size, long, __VA_ARGS__)                                \
+  CHECK_TO_ALL_TYPES(__size, int, __VA_ARGS__)                                 \
+  CHECK_TO_ALL_TYPES(__size, short, __VA_ARGS__)                               \
+  CHECK_TO_ALL_TYPES(__size, char, __VA_ARGS__)
+
+CHECK_ALL_COMBINATIONS(4, 0, 1, 2, 3);
+CHECK_ALL_COMBINATIONS(8, 0, 1, 2, 3, 4, 5, 6, 7);
+#undef CHECK_ALL_COMBINATIONS
+#undef CHECK_TO_ALL_TYPES
+#undef CHECK_NUM
+
+// Shuffle vector
+vector4int const vector4intConst1 = {0, 1, 2, 3};
+vector4int const vector4intConst2 = {4, 5, 6, 7};
+vector8int const vector8intConst = {};
+
+vector4int vectorShuffle1 =
+    __builtin_shufflevector(vector4intConst1, vector4intConst2, 0, 1, 2, 3);
+vector4int vectorShuffle2 =
+    __builtin_shufflevector(vector4intConst1, vector4intConst2, 4, 5, 6, 7);
+vector4int vectorShuffle3 =
+    __builtin_shufflevector(vector4intConst1, vector4intConst2, -1, -1, -1, -1);
+vector4int vectorShuffle4 =
+    __builtin_shufflevector(vector4intConst1, vector4intConst2, 0, 2, 4, 6);
+vector8int vectorShuffle5 = __builtin_shufflevector(
+    vector8intConst, vector8intConst, 0, 2, 4, 6, 8, 10, 12, 14);
+vector4int vectorShuffle6 = __builtin_shufflevector(
+    vector8intConst, vector8intConst, 0, 2, 4, 6);
+vector8int vectorShuffle7 =
+    __builtin_shufflevector(vector4intConst1, vector4intConst2, 0, 2, 4, 6, 1, 3, 5, 7);

>From ba489b7b476139eb7f79de403ade7ff6648e292e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Marcet=20Sard=C3=A0?= <polmarcetsarda at gmail.com>
Date: Thu, 4 Jan 2024 11:26:08 +0100
Subject: [PATCH 2/5] Address suggestions from RKSimon

---
 clang/lib/AST/ExprConstant.cpp              | 28 +++---
 clang/test/Sema/constant-builtins-2.c       | 61 -------------
 clang/test/Sema/constat_builtins_vector.cpp | 99 +++++++++++++++++++++
 3 files changed, 110 insertions(+), 78 deletions(-)
 create mode 100644 clang/test/Sema/constat_builtins_vector.cpp

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 6b43b6a1638fbc..0ff015acb2d202 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -10975,15 +10975,11 @@ static bool EvaluateVectorOrLValue(APValue &Result, EvalInfo &Info,
     // it was an rvalue to get the current APValue.
     LValue LValueFound;
     LValueFound.setFrom(Info.Ctx, Result);
-    if (!handleLValueToRValueConversion(Info, E, Type, LValueFound, Result)) {
+    if (!handleLValueToRValueConversion(Info, E, Type, LValueFound, Result))
       return false;
-    }
   }
 
-  if (!Result.isVector()) {
-    return false;
-  }
-  return true;
+  return Result.isVector();
 }
 
 static bool handleVectorConversion(EvalInfo &Info, const FPOptions FPO,
@@ -11027,9 +11023,10 @@ bool VectorExprEvaluator::VisitConvertVectorExpr(const ConvertVectorExpr *E) {
 
   const FPOptions FPO = E->getFPFeaturesInEffect(Info.Ctx.getLangOpts());
 
+  auto SourceLen = Source.getVectorLength();
   SmallVector<APValue, 4> ResultElements;
-  ResultElements.reserve(Source.getVectorLength());
-  for (unsigned EltNum = 0; EltNum < Source.getVectorLength(); ++EltNum) {
+  ResultElements.reserve(SourceLen);
+  for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
     APValue Elt;
     if (!handleVectorConversion(Info, FPO, E, SourceTy, DestTy,
                                 Source.getVectorElt(EltNum), Elt))
@@ -11048,22 +11045,19 @@ static bool handleVectorShuffle(EvalInfo &Info, const ShuffleVectorExpr *E,
 
   Expr const *IndexExpr = E->getExpr(2 + EltNum);
   APSInt IndexVal;
-  if (!EvaluateInteger(IndexExpr, IndexVal, Info)) {
+  if (!EvaluateInteger(IndexExpr, IndexVal, Info))
     return false;
-  }
 
   uint32_t index = IndexVal.getZExtValue();
   // The spec says that -1 should be treated as undef for optimizations,
   // but in constexpr we need to choose a value. We'll choose 0.
-  if (index >= TotalElementsInAVector * 2) {
+  if (index >= TotalElementsInAVector * 2)
     index = 0;
-  }
 
-  if (index >= TotalElementsInAVector) {
+  if (index >= TotalElementsInAVector)
     Result = VecVal2.getVectorElt(index - TotalElementsInAVector);
-  } else {
+  else
     Result = VecVal1.getVectorElt(index);
-  }
   return true;
 }
 
@@ -11078,9 +11072,9 @@ bool VectorExprEvaluator::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
     return false;
 
   VectorType const *DestVecTy = E->getType()->castAs<VectorType>();
-  if (!DestVecTy) {
+  if (!DestVecTy)
     return false;
-  }
+
   QualType DestElTy = DestVecTy->getElementType();
 
   auto TotalElementsInOutputVector = DestVecTy->getNumElements();
diff --git a/clang/test/Sema/constant-builtins-2.c b/clang/test/Sema/constant-builtins-2.c
index e6ea0cff94e7e1..a60a1f16a45874 100644
--- a/clang/test/Sema/constant-builtins-2.c
+++ b/clang/test/Sema/constant-builtins-2.c
@@ -427,64 +427,3 @@ extern __typeof__(__builtin_expect(0, 0)) bi0;
 // Strings
 int array1[__builtin_strlen("ab\0cd")];
 int array2[(sizeof(array1)/sizeof(int)) == 2? 1 : -1];
-
-typedef double vector4double __attribute__((__vector_size__(32)));
-typedef float vector4float __attribute__((__vector_size__(16)));
-typedef long long vector4long __attribute__((__vector_size__(32)));
-typedef int vector4int __attribute__((__vector_size__(16)));
-typedef short vector4short __attribute__((__vector_size__(8)));
-typedef char vector4char __attribute__((__vector_size__(4)));
-typedef double vector8double __attribute__((__vector_size__(64)));
-typedef float vector8float __attribute__((__vector_size__(32)));
-typedef long long vector8long __attribute__((__vector_size__(64)));
-typedef int vector8int __attribute__((__vector_size__(32)));
-typedef short vector8short __attribute__((__vector_size__(16)));
-typedef char vector8char __attribute__((__vector_size__(8)));
-
-// Convert vector
-#define CHECK_NUM(__size, __typeFrom, __typeTo, ...)                            \
-  vector##__size##__typeTo                                                      \
-      from_##vector##__size##__typeFrom##_to_##vector##__size##__typeTo##_var = \
-          __builtin_convertvector((vector##__size##__typeFrom){__VA_ARGS__},    \
-                                  vector##__size##__typeTo);
-#define CHECK_TO_ALL_TYPES(__size, __typeFrom, ...)                            \
-  CHECK_NUM(__size, __typeFrom, double, __VA_ARGS__)                           \
-  CHECK_NUM(__size, __typeFrom, float, __VA_ARGS__)                            \
-  CHECK_NUM(__size, __typeFrom, long, __VA_ARGS__)                             \
-  CHECK_NUM(__size, __typeFrom, int, __VA_ARGS__)                              \
-  CHECK_NUM(__size, __typeFrom, short, __VA_ARGS__)                            \
-  CHECK_NUM(__size, __typeFrom, char, __VA_ARGS__)
-
-#define CHECK_ALL_COMBINATIONS(__size, ...)                                    \
-  CHECK_TO_ALL_TYPES(__size, double, __VA_ARGS__)                              \
-  CHECK_TO_ALL_TYPES(__size, float, __VA_ARGS__)                               \
-  CHECK_TO_ALL_TYPES(__size, long, __VA_ARGS__)                                \
-  CHECK_TO_ALL_TYPES(__size, int, __VA_ARGS__)                                 \
-  CHECK_TO_ALL_TYPES(__size, short, __VA_ARGS__)                               \
-  CHECK_TO_ALL_TYPES(__size, char, __VA_ARGS__)
-
-CHECK_ALL_COMBINATIONS(4, 0, 1, 2, 3);
-CHECK_ALL_COMBINATIONS(8, 0, 1, 2, 3, 4, 5, 6, 7);
-#undef CHECK_ALL_COMBINATIONS
-#undef CHECK_TO_ALL_TYPES
-#undef CHECK_NUM
-
-// Shuffle vector
-vector4int const vector4intConst1 = {0, 1, 2, 3};
-vector4int const vector4intConst2 = {4, 5, 6, 7};
-vector8int const vector8intConst = {};
-
-vector4int vectorShuffle1 =
-    __builtin_shufflevector(vector4intConst1, vector4intConst2, 0, 1, 2, 3);
-vector4int vectorShuffle2 =
-    __builtin_shufflevector(vector4intConst1, vector4intConst2, 4, 5, 6, 7);
-vector4int vectorShuffle3 =
-    __builtin_shufflevector(vector4intConst1, vector4intConst2, -1, -1, -1, -1);
-vector4int vectorShuffle4 =
-    __builtin_shufflevector(vector4intConst1, vector4intConst2, 0, 2, 4, 6);
-vector8int vectorShuffle5 = __builtin_shufflevector(
-    vector8intConst, vector8intConst, 0, 2, 4, 6, 8, 10, 12, 14);
-vector4int vectorShuffle6 = __builtin_shufflevector(
-    vector8intConst, vector8intConst, 0, 2, 4, 6);
-vector8int vectorShuffle7 =
-    __builtin_shufflevector(vector4intConst1, vector4intConst2, 0, 2, 4, 6, 1, 3, 5, 7);
diff --git a/clang/test/Sema/constat_builtins_vector.cpp b/clang/test/Sema/constat_builtins_vector.cpp
new file mode 100644
index 00000000000000..afd5904e7afeab
--- /dev/null
+++ b/clang/test/Sema/constat_builtins_vector.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -verify -std=c++2a -fsyntax-only %s
+// expected-no-diagnostics
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define LITTLE_END 1
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define LITTLE_END 0
+#else
+#error "huh?"
+#endif
+
+typedef double vector4double __attribute__((__vector_size__(32)));
+typedef float vector4float __attribute__((__vector_size__(16)));
+typedef long long vector4long __attribute__((__vector_size__(32)));
+typedef int vector4int __attribute__((__vector_size__(16)));
+typedef short vector4short __attribute__((__vector_size__(8)));
+typedef char vector4char __attribute__((__vector_size__(4)));
+typedef double vector8double __attribute__((__vector_size__(64)));
+typedef float vector8float __attribute__((__vector_size__(32)));
+typedef long long vector8long __attribute__((__vector_size__(64)));
+typedef int vector8int __attribute__((__vector_size__(32)));
+typedef short vector8short __attribute__((__vector_size__(16)));
+typedef char vector8char __attribute__((__vector_size__(8)));
+
+#define CHECK_NUM(__size, __typeFrom, __typeTo, ...)                            \
+  constexpr vector##__size##__typeTo                                            \
+      from_##vector##__size##__typeFrom##_to_##vector##__size##__typeTo##_var = \
+          __builtin_convertvector((vector##__size##__typeFrom){__VA_ARGS__},    \
+                                  vector##__size##__typeTo);
+#define CHECK_TO_ALL_TYPES(__size, __typeFrom, ...)                            \
+  CHECK_NUM(__size, __typeFrom, double, __VA_ARGS__)                           \
+  CHECK_NUM(__size, __typeFrom, float, __VA_ARGS__)                            \
+  CHECK_NUM(__size, __typeFrom, long, __VA_ARGS__)                             \
+  CHECK_NUM(__size, __typeFrom, int, __VA_ARGS__)                              \
+  CHECK_NUM(__size, __typeFrom, short, __VA_ARGS__)                            \
+  CHECK_NUM(__size, __typeFrom, char, __VA_ARGS__)                             \
+  static_assert(                                                               \
+      __builtin_bit_cast(                                                      \
+          unsigned,                                                            \
+          __builtin_shufflevector(                                             \
+              from_vector##__size##__typeFrom##_to_vector##__size##char_var,   \
+              from_vector##__size##__typeFrom##_to_vector##__size##char_var,   \
+              0, 1, 2, 3)) == (LITTLE_END ? 0x03020100 : 0x00010203));         \
+  static_assert(                                                               \
+      __builtin_bit_cast(                                                      \
+          unsigned long long,                                                  \
+          __builtin_shufflevector(                                             \
+              from_vector##__size##__typeFrom##_to_vector##__size##short_var,  \
+              from_vector##__size##__typeFrom##_to_vector##__size##short_var,  \
+              0, 1, 2, 3)) ==                                                  \
+      (LITTLE_END ? 0x0003000200010000 : 0x0000000100020003));
+
+#define CHECK_ALL_COMBINATIONS(__size, ...)                                    \
+  CHECK_TO_ALL_TYPES(__size, double, __VA_ARGS__)                              \
+  CHECK_TO_ALL_TYPES(__size, float, __VA_ARGS__)                               \
+  CHECK_TO_ALL_TYPES(__size, long, __VA_ARGS__)                                \
+  CHECK_TO_ALL_TYPES(__size, int, __VA_ARGS__)                                 \
+  CHECK_TO_ALL_TYPES(__size, short, __VA_ARGS__)                               \
+  CHECK_TO_ALL_TYPES(__size, char, __VA_ARGS__)
+
+CHECK_ALL_COMBINATIONS(4, 0, 1, 2, 3);
+CHECK_ALL_COMBINATIONS(8, 0, 1, 2, 3, 4, 5, 6, 7);
+#undef CHECK_ALL_COMBINATIONS
+#undef CHECK_TO_ALL_TYPES
+#undef CHECK_NUM
+
+// Shuffle vector
+constexpr vector4char vector4charConst1 = {0, 1, 2, 3};
+constexpr vector4char vector4charConst2 = {4, 5, 6, 7};
+constexpr vector8char vector8intConst = {8, 9, 10, 11, 12, 13, 14, 15};
+
+constexpr vector4char vectorShuffle1 =
+    __builtin_shufflevector(vector4charConst1, vector4charConst2, 0, 1, 2, 3);
+static_assert(__builtin_bit_cast(unsigned, vectorShuffle1) ==
+              (LITTLE_END ? 0x03020100 : 0x00010203));
+constexpr vector4char vectorShuffle2 =
+    __builtin_shufflevector(vector4charConst1, vector4charConst2, 4, 5, 6, 7);
+static_assert(__builtin_bit_cast(unsigned, vectorShuffle2) ==
+              (LITTLE_END ? 0x07060504 : 0x04050607));
+constexpr vector4char vectorShuffle3 = __builtin_shufflevector(
+    vector4charConst1, vector4charConst2, -1, -1, -1, -1);
+static_assert(__builtin_bit_cast(unsigned, vectorShuffle3) ==
+              (LITTLE_END ? 0x00000000 : 0x00000000));
+constexpr vector4char vectorShuffle4 =
+    __builtin_shufflevector(vector4charConst1, vector4charConst2, 0, 2, 4, 6);
+static_assert(__builtin_bit_cast(unsigned, vectorShuffle4) ==
+              (LITTLE_END ? 0x06040200 : 0x00020406));
+constexpr vector8char vectorShuffle5 = __builtin_shufflevector(
+    vector8intConst, vector8intConst, 0, 2, 4, 6, 8, 10, 12, 14);
+static_assert(__builtin_bit_cast(unsigned long long, vectorShuffle5) ==
+              (LITTLE_END ? 0x0E0C0A080E0C0A08 : 0x080A0C0E080A0C0E));
+constexpr vector4char vectorShuffle6 =
+    __builtin_shufflevector(vector8intConst, vector8intConst, 0, 2, 4, 6);
+static_assert(__builtin_bit_cast(unsigned, vectorShuffle6) ==
+              (LITTLE_END ? 0x0E0C0A08 : 0x080A0C0E));
+constexpr vector8char vectorShuffle7 = __builtin_shufflevector(
+    vector4charConst1, vector4charConst2, 0, 2, 4, 6, 1, 3, 5, 7);
+static_assert(__builtin_bit_cast(unsigned long long, vectorShuffle7) ==
+              (LITTLE_END ? 0x0705030106040200 : 0x0002040601030507));

>From a233392961173aa32aab89d2d5fce20f956a0d67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Marcet=20Sard=C3=A0?= <polmarcetsarda at gmail.com>
Date: Thu, 4 Jan 2024 12:31:08 +0100
Subject: [PATCH 3/5] Fix typo in file name

---
 .../{constat_builtins_vector.cpp => constant_builtins_vector.cpp} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename clang/test/Sema/{constat_builtins_vector.cpp => constant_builtins_vector.cpp} (100%)

diff --git a/clang/test/Sema/constat_builtins_vector.cpp b/clang/test/Sema/constant_builtins_vector.cpp
similarity index 100%
rename from clang/test/Sema/constat_builtins_vector.cpp
rename to clang/test/Sema/constant_builtins_vector.cpp

>From 25f3a1ad5e2ff86c504aab33a6afff0e482b0880 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Marcet=20Sard=C3=A0?= <polmarcetsarda at gmail.com>
Date: Sun, 31 Mar 2024 18:18:45 +0200
Subject: [PATCH 4/5] Following the review of sethp, I have made the following
 changes:

-- Added diagnostic for the undefined shuffle of -1
-- Validated support for _BitInt
-- A bunch of other minnor tweaks here and there
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +
 clang/lib/AST/ExprConstant.cpp                | 69 ++++++++-----------
 clang/test/Sema/constant_builtins_vector.cpp  | 51 ++++++++++----
 clang/test/Sema/convertvector.c               |  3 +
 4 files changed, 70 insertions(+), 55 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index df57f5e6ce11ba..ea93158db8107c 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10318,6 +10318,8 @@ def err_shufflevector_nonconstant_argument : Error<
 def err_shufflevector_argument_too_large : Error<
   "index for __builtin_shufflevector must be less than the total number "
   "of vector elements">;
+def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
+  "index for __builtin_shufflevector must be within the bounds of the input vectors in a constexpr context. An index of -1 at position %0 was found. -1 is only allowed at runtime.">;
 
 def err_convertvector_non_vector : Error<
   "first argument to __builtin_convertvector must be a vector">;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0ff015acb2d202..7e817be7f5cf13 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2706,8 +2706,11 @@ static bool checkFloatingPointResult(EvalInfo &Info, const Expr *E,
 static bool HandleFloatToFloatCast(EvalInfo &Info, const Expr *E,
                                    QualType SrcType, QualType DestType,
                                    APFloat &Result) {
-  assert(isa<CastExpr>(E) || isa<CompoundAssignOperator>(E) ||
-         isa<ConvertVectorExpr>(E));
+  assert((isa<CastExpr>(E) || isa<CompoundAssignOperator>(E) ||
+          isa<ConvertVectorExpr>(E)) &&
+         "HandleFloatToFloatCast has been checked with only CastExpr, "
+         "CompoundAssignOperator and ConvertVectorExpr. Please either validate "
+         "the new expression or address the root cause of this usage.");
   llvm::RoundingMode RM = getActiveRoundingMode(Info, E);
   APFloat::opStatus St;
   APFloat Value = Result;
@@ -10714,7 +10717,7 @@ namespace {
     bool VisitShuffleVectorExpr(const ShuffleVectorExpr *E);
 
     // FIXME: Missing: conditional operator (for GNU
-    //                 conditional select), shufflevector, ExtVectorElementExpr
+    //                 conditional select), ExtVectorElementExpr
   };
 } // end anonymous namespace
 
@@ -10965,27 +10968,10 @@ bool VectorExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) {
   return Success(APValue(ResultElements.data(), ResultElements.size()), E);
 }
 
-static bool EvaluateVectorOrLValue(APValue &Result, EvalInfo &Info,
-                                   const Expr *E, const QualType &Type) {
-  if (!Evaluate(Result, Info, E))
-    return false;
-
-  if (Result.isLValue()) {
-    // Source of the data is an lvalue; Manually handle the lvalue as if
-    // it was an rvalue to get the current APValue.
-    LValue LValueFound;
-    LValueFound.setFrom(Info.Ctx, Result);
-    if (!handleLValueToRValueConversion(Info, E, Type, LValueFound, Result))
-      return false;
-  }
-
-  return Result.isVector();
-}
-
-static bool handleVectorConversion(EvalInfo &Info, const FPOptions FPO,
-                                   const Expr *E, QualType SourceTy,
-                                   QualType DestTy, APValue const &Original,
-                                   APValue &Result) {
+static bool handleVectorElementCast(EvalInfo &Info, const FPOptions FPO,
+                                    const Expr *E, QualType SourceTy,
+                                    QualType DestTy, APValue const &Original,
+                                    APValue &Result) {
   if (SourceTy->isIntegerType()) {
     if (DestTy->isRealFloatingType()) {
       Result = APValue(APFloat(0.0));
@@ -11015,7 +11001,7 @@ static bool handleVectorConversion(EvalInfo &Info, const FPOptions FPO,
 bool VectorExprEvaluator::VisitConvertVectorExpr(const ConvertVectorExpr *E) {
   APValue Source;
   QualType SourceVecType = E->getSrcExpr()->getType();
-  if (!EvaluateVectorOrLValue(Source, Info, E->getSrcExpr(), SourceVecType))
+  if (!EvaluateAsRValue(Info, E->getSrcExpr(), Source))
     return false;
 
   QualType DestTy = E->getType()->castAs<VectorType>()->getElementType();
@@ -11028,8 +11014,8 @@ bool VectorExprEvaluator::VisitConvertVectorExpr(const ConvertVectorExpr *E) {
   ResultElements.reserve(SourceLen);
   for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
     APValue Elt;
-    if (!handleVectorConversion(Info, FPO, E, SourceTy, DestTy,
-                                Source.getVectorElt(EltNum), Elt))
+    if (!handleVectorElementCast(Info, FPO, E, SourceTy, DestTy,
+                                 Source.getVectorElt(EltNum), Elt))
       return false;
     ResultElements.push_back(std::move(Elt));
   }
@@ -11041,21 +11027,24 @@ static bool handleVectorShuffle(EvalInfo &Info, const ShuffleVectorExpr *E,
                                 QualType ElemType, APValue const &VecVal1,
                                 APValue const &VecVal2, unsigned EltNum,
                                 APValue &Result) {
-  unsigned const TotalElementsInAVector = VecVal1.getVectorLength();
-
-  Expr const *IndexExpr = E->getExpr(2 + EltNum);
-  APSInt IndexVal;
-  if (!EvaluateInteger(IndexExpr, IndexVal, Info))
-    return false;
+  unsigned const TotalElementsInInputVector = VecVal1.getVectorLength();
 
-  uint32_t index = IndexVal.getZExtValue();
+  APSInt IndexVal = E->getShuffleMaskIdx(Info.Ctx, EltNum);
+  int64_t index = IndexVal.getExtValue();
   // The spec says that -1 should be treated as undef for optimizations,
   // but in constexpr we need to choose a value. We'll choose 0.
-  if (index >= TotalElementsInAVector * 2)
-    index = 0;
+  if (index == -1) {
+    Info.FFDiag(
+        E, diag::err_shufflevector_minus_one_is_undefined_behavior_constexpr)
+        << EltNum;
+    return false;
+  }
+
+  if (index < 0 || index >= TotalElementsInInputVector * 2)
+    llvm_unreachable("Out of bounds shuffle index");
 
-  if (index >= TotalElementsInAVector)
-    Result = VecVal2.getVectorElt(index - TotalElementsInAVector);
+  if (index >= TotalElementsInInputVector)
+    Result = VecVal2.getVectorElt(index - TotalElementsInInputVector);
   else
     Result = VecVal1.getVectorElt(index);
   return true;
@@ -11064,11 +11053,11 @@ static bool handleVectorShuffle(EvalInfo &Info, const ShuffleVectorExpr *E,
 bool VectorExprEvaluator::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
   APValue VecVal1;
   const Expr *Vec1 = E->getExpr(0);
-  if (!EvaluateVectorOrLValue(VecVal1, Info, Vec1, Vec1->getType()))
+  if (!EvaluateAsRValue(Info, Vec1, VecVal1))
     return false;
   APValue VecVal2;
   const Expr *Vec2 = E->getExpr(1);
-  if (!EvaluateVectorOrLValue(VecVal2, Info, Vec2, Vec2->getType()))
+  if (!EvaluateAsRValue(Info, Vec2, VecVal2))
     return false;
 
   VectorType const *DestVecTy = E->getType()->castAs<VectorType>();
diff --git a/clang/test/Sema/constant_builtins_vector.cpp b/clang/test/Sema/constant_builtins_vector.cpp
index afd5904e7afeab..73f8eed22abcbd 100644
--- a/clang/test/Sema/constant_builtins_vector.cpp
+++ b/clang/test/Sema/constant_builtins_vector.cpp
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -verify -std=c++2a -fsyntax-only %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -verify -std=c++2a -fsyntax-only -Wno-bit-int-extension %s
 
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define LITTLE_END 1
@@ -9,18 +8,29 @@
 #error "huh?"
 #endif
 
+// We also support _BitInt as long as it is >=8 and a power of 2.
+typedef _BitInt(8) BitInt8;
+typedef _BitInt(32) BitInt32;
+typedef _BitInt(128) BitInt128;
+
 typedef double vector4double __attribute__((__vector_size__(32)));
 typedef float vector4float __attribute__((__vector_size__(16)));
 typedef long long vector4long __attribute__((__vector_size__(32)));
 typedef int vector4int __attribute__((__vector_size__(16)));
 typedef short vector4short __attribute__((__vector_size__(8)));
 typedef char vector4char __attribute__((__vector_size__(4)));
+typedef BitInt8 vector4BitInt8 __attribute__((__vector_size__(4)));
+typedef BitInt32 vector4BitInt32 __attribute__((__vector_size__(16)));
+typedef BitInt128 vector4BitInt128 __attribute__((__vector_size__(64)));
 typedef double vector8double __attribute__((__vector_size__(64)));
 typedef float vector8float __attribute__((__vector_size__(32)));
 typedef long long vector8long __attribute__((__vector_size__(64)));
 typedef int vector8int __attribute__((__vector_size__(32)));
 typedef short vector8short __attribute__((__vector_size__(16)));
 typedef char vector8char __attribute__((__vector_size__(8)));
+typedef BitInt8 vector8BitInt8 __attribute__((__vector_size__(8)));
+typedef BitInt32 vector8BitInt32 __attribute__((__vector_size__(32)));
+typedef BitInt128 vector8BitInt128 __attribute__((__vector_size__(128)));
 
 #define CHECK_NUM(__size, __typeFrom, __typeTo, ...)                            \
   constexpr vector##__size##__typeTo                                            \
@@ -34,6 +44,9 @@ typedef char vector8char __attribute__((__vector_size__(8)));
   CHECK_NUM(__size, __typeFrom, int, __VA_ARGS__)                              \
   CHECK_NUM(__size, __typeFrom, short, __VA_ARGS__)                            \
   CHECK_NUM(__size, __typeFrom, char, __VA_ARGS__)                             \
+  CHECK_NUM(__size, __typeFrom, BitInt8, __VA_ARGS__)                          \
+  CHECK_NUM(__size, __typeFrom, BitInt32, __VA_ARGS__)                         \
+  CHECK_NUM(__size, __typeFrom, BitInt128, __VA_ARGS__)                        \
   static_assert(                                                               \
       __builtin_bit_cast(                                                      \
           unsigned,                                                            \
@@ -56,7 +69,10 @@ typedef char vector8char __attribute__((__vector_size__(8)));
   CHECK_TO_ALL_TYPES(__size, long, __VA_ARGS__)                                \
   CHECK_TO_ALL_TYPES(__size, int, __VA_ARGS__)                                 \
   CHECK_TO_ALL_TYPES(__size, short, __VA_ARGS__)                               \
-  CHECK_TO_ALL_TYPES(__size, char, __VA_ARGS__)
+  CHECK_TO_ALL_TYPES(__size, char, __VA_ARGS__)                                \
+  CHECK_TO_ALL_TYPES(__size, BitInt8, __VA_ARGS__)                             \
+  CHECK_TO_ALL_TYPES(__size, BitInt32, __VA_ARGS__)                            \
+  CHECK_TO_ALL_TYPES(__size, BitInt128, __VA_ARGS__)
 
 CHECK_ALL_COMBINATIONS(4, 0, 1, 2, 3);
 CHECK_ALL_COMBINATIONS(8, 0, 1, 2, 3, 4, 5, 6, 7);
@@ -77,23 +93,28 @@ constexpr vector4char vectorShuffle2 =
     __builtin_shufflevector(vector4charConst1, vector4charConst2, 4, 5, 6, 7);
 static_assert(__builtin_bit_cast(unsigned, vectorShuffle2) ==
               (LITTLE_END ? 0x07060504 : 0x04050607));
-constexpr vector4char vectorShuffle3 = __builtin_shufflevector(
-    vector4charConst1, vector4charConst2, -1, -1, -1, -1);
-static_assert(__builtin_bit_cast(unsigned, vectorShuffle3) ==
-              (LITTLE_END ? 0x00000000 : 0x00000000));
-constexpr vector4char vectorShuffle4 =
+constexpr vector4char vectorShuffle3 =
     __builtin_shufflevector(vector4charConst1, vector4charConst2, 0, 2, 4, 6);
-static_assert(__builtin_bit_cast(unsigned, vectorShuffle4) ==
+static_assert(__builtin_bit_cast(unsigned, vectorShuffle3) ==
               (LITTLE_END ? 0x06040200 : 0x00020406));
-constexpr vector8char vectorShuffle5 = __builtin_shufflevector(
+constexpr vector8char vectorShuffle4 = __builtin_shufflevector(
     vector8intConst, vector8intConst, 0, 2, 4, 6, 8, 10, 12, 14);
-static_assert(__builtin_bit_cast(unsigned long long, vectorShuffle5) ==
+static_assert(__builtin_bit_cast(unsigned long long, vectorShuffle4) ==
               (LITTLE_END ? 0x0E0C0A080E0C0A08 : 0x080A0C0E080A0C0E));
-constexpr vector4char vectorShuffle6 =
+constexpr vector4char vectorShuffle5 =
     __builtin_shufflevector(vector8intConst, vector8intConst, 0, 2, 4, 6);
-static_assert(__builtin_bit_cast(unsigned, vectorShuffle6) ==
+static_assert(__builtin_bit_cast(unsigned, vectorShuffle5) ==
               (LITTLE_END ? 0x0E0C0A08 : 0x080A0C0E));
-constexpr vector8char vectorShuffle7 = __builtin_shufflevector(
+constexpr vector8char vectorShuffle6 = __builtin_shufflevector(
     vector4charConst1, vector4charConst2, 0, 2, 4, 6, 1, 3, 5, 7);
-static_assert(__builtin_bit_cast(unsigned long long, vectorShuffle7) ==
+static_assert(__builtin_bit_cast(unsigned long long, vectorShuffle6) ==
               (LITTLE_END ? 0x0705030106040200 : 0x0002040601030507));
+
+constexpr vector4char
+    vectorShuffleFail1 = // expected-error {{constexpr variable 'vectorShuffleFail1'\
+ must be initialized by a constant expression}}
+    __builtin_shufflevector( // expected-error {{index for __builtin_shufflevector must be within\
+ the bounds of the input vectors in a constexpr context. An index of\
+ -1 at position 0 was found. -1 is only allowed at runtime.}}
+        vector4charConst1,
+        vector4charConst2, -1, -1, -1, -1);
diff --git a/clang/test/Sema/convertvector.c b/clang/test/Sema/convertvector.c
index 8ae43c3ba3d493..1ff04af9098185 100644
--- a/clang/test/Sema/convertvector.c
+++ b/clang/test/Sema/convertvector.c
@@ -15,3 +15,6 @@ vector8float foo3(double x) {
   return __builtin_convertvector(x, vector8float);  // expected-error {{must be a vector}}
 }
 
+float foo4(float x) {
+  return __builtin_convertvector(x, float); // expected-error {{first argument to __builtin_convertvector must be a vector}}
+}

>From d0220bede29383282624a9d1ce3dbf0bfe832d9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Marcet=20Sard=C3=A0?= <polmarcetsarda at gmail.com>
Date: Sat, 20 Apr 2024 12:19:49 +0200
Subject: [PATCH 5/5] Address some misc comments; added a diagnostic and
 expanded macros in testing.

---
 clang/docs/LanguageExtensions.rst             |   3 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   4 +-
 clang/lib/AST/ExprConstant.cpp                |  20 +-
 clang/test/Sema/constant_builtins_vector.cpp  | 613 +++++++++++++++++-
 4 files changed, 625 insertions(+), 15 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 485e4af69b3e02..4cb610bfed550e 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -2952,7 +2952,7 @@ for the implementation of various target-specific header files like
   // Concatenate every other element of 8-element vectors V1 and V2.
   __builtin_shufflevector(V1, V2, 0, 2, 4, 6, 8, 10, 12, 14)
 
-  // Shuffle v1 with some elements being undefined
+  // Shuffle v1 with some elements being undefined. Not allowed in constexpr.
   __builtin_shufflevector(v1, v1, 3, -1, 1, -1)
 
 **Description**:
@@ -2965,6 +2965,7 @@ starting with the first vector, continuing into the second vector.  Thus, if
 ``vec1`` is a 4-element vector, index 5 would refer to the second element of
 ``vec2``. An index of -1 can be used to indicate that the corresponding element
 in the returned vector is a don't care and can be optimized by the backend.
+Values of -1 are not supported in constant expressions.
 
 The result of ``__builtin_shufflevector`` is a vector with the same element
 type as ``vec1``/``vec2`` but that has an element count equal to the number of
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ea93158db8107c..5adc8d1a380650 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10319,10 +10319,12 @@ def err_shufflevector_argument_too_large : Error<
   "index for __builtin_shufflevector must be less than the total number "
   "of vector elements">;
 def err_shufflevector_minus_one_is_undefined_behavior_constexpr : Error<
-  "index for __builtin_shufflevector must be within the bounds of the input vectors in a constexpr context. An index of -1 at position %0 was found. -1 is only allowed at runtime.">;
+  "index for __builtin_shufflevector not within the bounds of the input vectors; index of -1 found at position %0 not permitted in a constexpr context.">;
 
 def err_convertvector_non_vector : Error<
   "first argument to __builtin_convertvector must be a vector">;
+def err_convertvector_constexpr_unsupported_vector_cast : Error<
+  "unsupported vector cast from %0 to %1 in a constant expression.">;
 def err_builtin_non_vector_type : Error<
   "%0 argument to %1 must be of vector type">;
 def err_convertvector_incompatible_vector : Error<
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7e817be7f5cf13..c9da9b3ef00fa4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -10995,6 +10995,9 @@ static bool handleVectorElementCast(EvalInfo &Info, const FPOptions FPO,
                                   DestTy, Result.getInt());
     }
   }
+
+  Info.FFDiag(E, diag::err_convertvector_constexpr_unsupported_vector_cast)
+      << SourceTy << DestTy;
   return false;
 }
 
@@ -11027,12 +11030,15 @@ static bool handleVectorShuffle(EvalInfo &Info, const ShuffleVectorExpr *E,
                                 QualType ElemType, APValue const &VecVal1,
                                 APValue const &VecVal2, unsigned EltNum,
                                 APValue &Result) {
-  unsigned const TotalElementsInInputVector = VecVal1.getVectorLength();
+  unsigned const TotalElementsInInputVector1 = VecVal1.getVectorLength();
+  unsigned const TotalElementsInInputVector2 = VecVal2.getVectorLength();
 
   APSInt IndexVal = E->getShuffleMaskIdx(Info.Ctx, EltNum);
   int64_t index = IndexVal.getExtValue();
   // The spec says that -1 should be treated as undef for optimizations,
-  // but in constexpr we need to choose a value. We'll choose 0.
+  // but in constexpr we'd have to produce an APValue::Indeterminate,
+  // which is prohibited from being a top-level constant value. Emit a
+  // diagnostic instead.
   if (index == -1) {
     Info.FFDiag(
         E, diag::err_shufflevector_minus_one_is_undefined_behavior_constexpr)
@@ -11040,11 +11046,12 @@ static bool handleVectorShuffle(EvalInfo &Info, const ShuffleVectorExpr *E,
     return false;
   }
 
-  if (index < 0 || index >= TotalElementsInInputVector * 2)
+  if (index < 0 ||
+      index >= TotalElementsInInputVector1 + TotalElementsInInputVector2)
     llvm_unreachable("Out of bounds shuffle index");
 
-  if (index >= TotalElementsInInputVector)
-    Result = VecVal2.getVectorElt(index - TotalElementsInInputVector);
+  if (index >= TotalElementsInInputVector1)
+    Result = VecVal2.getVectorElt(index - TotalElementsInInputVector1);
   else
     Result = VecVal1.getVectorElt(index);
   return true;
@@ -11061,9 +11068,6 @@ bool VectorExprEvaluator::VisitShuffleVectorExpr(const ShuffleVectorExpr *E) {
     return false;
 
   VectorType const *DestVecTy = E->getType()->castAs<VectorType>();
-  if (!DestVecTy)
-    return false;
-
   QualType DestElTy = DestVecTy->getElementType();
 
   auto TotalElementsInOutputVector = DestVecTy->getNumElements();
diff --git a/clang/test/Sema/constant_builtins_vector.cpp b/clang/test/Sema/constant_builtins_vector.cpp
index 73f8eed22abcbd..68620d436fc43e 100644
--- a/clang/test/Sema/constant_builtins_vector.cpp
+++ b/clang/test/Sema/constant_builtins_vector.cpp
@@ -74,8 +74,611 @@ typedef BitInt128 vector8BitInt128 __attribute__((__vector_size__(128)));
   CHECK_TO_ALL_TYPES(__size, BitInt32, __VA_ARGS__)                            \
   CHECK_TO_ALL_TYPES(__size, BitInt128, __VA_ARGS__)
 
-CHECK_ALL_COMBINATIONS(4, 0, 1, 2, 3);
-CHECK_ALL_COMBINATIONS(8, 0, 1, 2, 3, 4, 5, 6, 7);
+// The result below is expanded from these macros. Use them to autogenerate the
+// test cases below.
+// CHECK_ALL_COMBINATIONS(4, 0, 1, 2, 3);
+// CHECK_ALL_COMBINATIONS(8, 0, 1, 2, 3, 4, 5, 6, 7);
+
+constexpr vector4double from_vector4double_to_vector4double_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4double_to_vector4float_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4double_to_vector4long_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4double_to_vector4int_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4double_to_vector4short_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4double_to_vector4char_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4double_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4double_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4double_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4double){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(
+                  unsigned,
+                  __builtin_shufflevector(from_vector4double_to_vector4char_var,
+                                          from_vector4double_to_vector4char_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector4double_to_vector4short_var,
+                                     from_vector4double_to_vector4short_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4float_to_vector4double_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4float_to_vector4float_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4float_to_vector4long_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4float_to_vector4int_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4float_to_vector4short_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4float_to_vector4char_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4float_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4float_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4float_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4float){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4float_to_vector4char_var,
+                                     from_vector4float_to_vector4char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector4float_to_vector4short_var,
+                                          from_vector4float_to_vector4short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4long_to_vector4double_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4long_to_vector4float_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4long_to_vector4long_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4long_to_vector4int_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4long_to_vector4short_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4long_to_vector4char_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4long_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4long_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4long_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4long){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4long_to_vector4char_var,
+                                     from_vector4long_to_vector4char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector4long_to_vector4short_var,
+                                          from_vector4long_to_vector4short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4int_to_vector4double_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4int_to_vector4float_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4int_to_vector4long_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4int_to_vector4int_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4int_to_vector4short_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4int_to_vector4char_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4int_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4int_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4int_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4int){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4int_to_vector4char_var,
+                                     from_vector4int_to_vector4char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector4int_to_vector4short_var,
+                                          from_vector4int_to_vector4short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4short_to_vector4double_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4short_to_vector4float_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4short_to_vector4long_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4short_to_vector4int_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4short_to_vector4short_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4short_to_vector4char_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4short_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4short_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4short_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4short){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4short_to_vector4char_var,
+                                     from_vector4short_to_vector4char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector4short_to_vector4short_var,
+                                          from_vector4short_to_vector4short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4char_to_vector4double_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4char_to_vector4float_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4char_to_vector4long_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4char_to_vector4int_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4char_to_vector4short_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4char_to_vector4char_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4char_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4char_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4char_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4char){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4char_to_vector4char_var,
+                                     from_vector4char_to_vector4char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector4char_to_vector4short_var,
+                                          from_vector4char_to_vector4short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4BitInt8_to_vector4double_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4BitInt8_to_vector4float_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4BitInt8_to_vector4long_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4BitInt8_to_vector4int_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4BitInt8_to_vector4short_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4BitInt8_to_vector4char_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4BitInt8_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4BitInt8_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4BitInt8_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4BitInt8){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4BitInt8_to_vector4char_var,
+                                     from_vector4BitInt8_to_vector4char_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector4BitInt8_to_vector4short_var,
+                                     from_vector4BitInt8_to_vector4short_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4BitInt32_to_vector4double_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4BitInt32_to_vector4float_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4BitInt32_to_vector4long_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4BitInt32_to_vector4int_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4BitInt32_to_vector4short_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4BitInt32_to_vector4char_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4BitInt32_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4BitInt32_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4BitInt32_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4BitInt32){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4BitInt32_to_vector4char_var,
+                                     from_vector4BitInt32_to_vector4char_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector4BitInt32_to_vector4short_var,
+                                     from_vector4BitInt32_to_vector4short_var,
+                                     0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector4double from_vector4BitInt128_to_vector4double_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4double);
+constexpr vector4float from_vector4BitInt128_to_vector4float_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4float);
+constexpr vector4long from_vector4BitInt128_to_vector4long_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4long);
+constexpr vector4int from_vector4BitInt128_to_vector4int_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4int);
+constexpr vector4short from_vector4BitInt128_to_vector4short_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4short);
+constexpr vector4char from_vector4BitInt128_to_vector4char_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4char);
+constexpr vector4BitInt8 from_vector4BitInt128_to_vector4BitInt8_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4BitInt8);
+constexpr vector4BitInt32 from_vector4BitInt128_to_vector4BitInt32_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4BitInt32);
+constexpr vector4BitInt128 from_vector4BitInt128_to_vector4BitInt128_var =
+    __builtin_convertvector((vector4BitInt128){0, 1, 2, 3}, vector4BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector4BitInt128_to_vector4char_var,
+                                     from_vector4BitInt128_to_vector4char_var,
+                                     0, 1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector4BitInt128_to_vector4short_var,
+                                     from_vector4BitInt128_to_vector4short_var,
+                                     0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+;
+constexpr vector8double from_vector8double_to_vector8double_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8double_to_vector8float_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8double_to_vector8long_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8long);
+constexpr vector8int from_vector8double_to_vector8int_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8int);
+constexpr vector8short from_vector8double_to_vector8short_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8double_to_vector8char_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8char);
+constexpr vector8BitInt8 from_vector8double_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8double_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8double_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8double){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(
+                  unsigned,
+                  __builtin_shufflevector(from_vector8double_to_vector8char_var,
+                                          from_vector8double_to_vector8char_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector8double_to_vector8short_var,
+                                     from_vector8double_to_vector8short_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8float_to_vector8double_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8float_to_vector8float_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8float_to_vector8long_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8long);
+constexpr vector8int from_vector8float_to_vector8int_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7}, vector8int);
+constexpr vector8short from_vector8float_to_vector8short_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8float_to_vector8char_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8char);
+constexpr vector8BitInt8 from_vector8float_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8float_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8float_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8float){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8float_to_vector8char_var,
+                                     from_vector8float_to_vector8char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector8float_to_vector8short_var,
+                                          from_vector8float_to_vector8short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8long_to_vector8double_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8long_to_vector8float_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8long_to_vector8long_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7}, vector8long);
+constexpr vector8int from_vector8long_to_vector8int_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7}, vector8int);
+constexpr vector8short from_vector8long_to_vector8short_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8long_to_vector8char_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7}, vector8char);
+constexpr vector8BitInt8 from_vector8long_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8long_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8long_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8long){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8long_to_vector8char_var,
+                                     from_vector8long_to_vector8char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector8long_to_vector8short_var,
+                                          from_vector8long_to_vector8short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8int_to_vector8double_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8int_to_vector8float_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7}, vector8float);
+constexpr vector8long from_vector8int_to_vector8long_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7}, vector8long);
+constexpr vector8int from_vector8int_to_vector8int_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7}, vector8int);
+constexpr vector8short from_vector8int_to_vector8short_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7}, vector8short);
+constexpr vector8char from_vector8int_to_vector8char_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7}, vector8char);
+constexpr vector8BitInt8 from_vector8int_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8int_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8int_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8int){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8int_to_vector8char_var,
+                                     from_vector8int_to_vector8char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector8int_to_vector8short_var,
+                                          from_vector8int_to_vector8short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8short_to_vector8double_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8short_to_vector8float_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8short_to_vector8long_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8long);
+constexpr vector8int from_vector8short_to_vector8int_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7}, vector8int);
+constexpr vector8short from_vector8short_to_vector8short_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8short_to_vector8char_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8char);
+constexpr vector8BitInt8 from_vector8short_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8short_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8short_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8short){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8short_to_vector8char_var,
+                                     from_vector8short_to_vector8char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector8short_to_vector8short_var,
+                                          from_vector8short_to_vector8short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8char_to_vector8double_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8char_to_vector8float_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8char_to_vector8long_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7}, vector8long);
+constexpr vector8int from_vector8char_to_vector8int_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7}, vector8int);
+constexpr vector8short from_vector8char_to_vector8short_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8char_to_vector8char_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7}, vector8char);
+constexpr vector8BitInt8 from_vector8char_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8char_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8char_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8char){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8char_to_vector8char_var,
+                                     from_vector8char_to_vector8char_var, 0, 1,
+                                     2, 3)) == (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(
+                  unsigned long long,
+                  __builtin_shufflevector(from_vector8char_to_vector8short_var,
+                                          from_vector8char_to_vector8short_var,
+                                          0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8BitInt8_to_vector8double_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8BitInt8_to_vector8float_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8BitInt8_to_vector8long_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8long);
+constexpr vector8int from_vector8BitInt8_to_vector8int_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8int);
+constexpr vector8short from_vector8BitInt8_to_vector8short_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8BitInt8_to_vector8char_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8char);
+constexpr vector8BitInt8 from_vector8BitInt8_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8BitInt8_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8BitInt8_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8BitInt8){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8BitInt8_to_vector8char_var,
+                                     from_vector8BitInt8_to_vector8char_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector8BitInt8_to_vector8short_var,
+                                     from_vector8BitInt8_to_vector8short_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8BitInt32_to_vector8double_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8BitInt32_to_vector8float_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8BitInt32_to_vector8long_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8long);
+constexpr vector8int from_vector8BitInt32_to_vector8int_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8int);
+constexpr vector8short from_vector8BitInt32_to_vector8short_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8BitInt32_to_vector8char_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8char);
+constexpr vector8BitInt8 from_vector8BitInt32_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8BitInt32_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8BitInt32_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8BitInt32){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8BitInt32_to_vector8char_var,
+                                     from_vector8BitInt32_to_vector8char_var, 0,
+                                     1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector8BitInt32_to_vector8short_var,
+                                     from_vector8BitInt32_to_vector8short_var,
+                                     0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+constexpr vector8double from_vector8BitInt128_to_vector8double_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8double);
+constexpr vector8float from_vector8BitInt128_to_vector8float_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8float);
+constexpr vector8long from_vector8BitInt128_to_vector8long_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8long);
+constexpr vector8int from_vector8BitInt128_to_vector8int_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8int);
+constexpr vector8short from_vector8BitInt128_to_vector8short_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8short);
+constexpr vector8char from_vector8BitInt128_to_vector8char_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8char);
+constexpr vector8BitInt8 from_vector8BitInt128_to_vector8BitInt8_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt8);
+constexpr vector8BitInt32 from_vector8BitInt128_to_vector8BitInt32_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt32);
+constexpr vector8BitInt128 from_vector8BitInt128_to_vector8BitInt128_var =
+    __builtin_convertvector((vector8BitInt128){0, 1, 2, 3, 4, 5, 6, 7},
+                            vector8BitInt128);
+static_assert(__builtin_bit_cast(unsigned,
+                                 __builtin_shufflevector(
+                                     from_vector8BitInt128_to_vector8char_var,
+                                     from_vector8BitInt128_to_vector8char_var,
+                                     0, 1, 2, 3)) ==
+              (1 ? 0x03020100 : 0x00010203));
+static_assert(__builtin_bit_cast(unsigned long long,
+                                 __builtin_shufflevector(
+                                     from_vector8BitInt128_to_vector8short_var,
+                                     from_vector8BitInt128_to_vector8short_var,
+                                     0, 1, 2, 3)) ==
+              (1 ? 0x0003000200010000 : 0x0000000100020003));
+;
 #undef CHECK_ALL_COMBINATIONS
 #undef CHECK_TO_ALL_TYPES
 #undef CHECK_NUM
@@ -113,8 +716,8 @@ static_assert(__builtin_bit_cast(unsigned long long, vectorShuffle6) ==
 constexpr vector4char
     vectorShuffleFail1 = // expected-error {{constexpr variable 'vectorShuffleFail1'\
  must be initialized by a constant expression}}
-    __builtin_shufflevector( // expected-error {{index for __builtin_shufflevector must be within\
- the bounds of the input vectors in a constexpr context. An index of\
- -1 at position 0 was found. -1 is only allowed at runtime.}}
+    __builtin_shufflevector( // expected-error {{index for __builtin_shufflevector \
+not within the bounds of the input vectors; index of -1 found at position 0 not \
+permitted in a constexpr context.}}
         vector4charConst1,
         vector4charConst2, -1, -1, -1, -1);