[llvm] [InstCombine/RISCV] Constant-fold vmv.v.x (PR #182630)

Sat Feb 21 03:34:31 PST 2026

https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/182630

>From 2a46639bfab97bb71f671d2c679f92d845bcd82b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Fri, 20 Feb 2026 23:04:41 +0000
Subject: [PATCH 1/5] [InstCombine] Pre-commit test for riscv-vmv-v-x

---
 .../InstCombine/RISCV/riscv-vmv-v-x.ll        | 100 ++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll

diff --git a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
new file mode 100644
index 0000000000000..914957dfce350
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p instcombine -mtriple=riscv32 -mattr=+v -S %s | FileCheck %s
+; RUN: opt -p instcombine -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
+
+define <8 x i8> @fixed() {
+; CHECK-LABEL: define <8 x i8> @fixed(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 4)
+; CHECK-NEXT:    ret <8 x i8> [[A]]
+;
+  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 4)
+  ret <8 x i8> %a
+}
+
+define <vscale x 8 x i8> @scalable() {
+; CHECK-LABEL: define <vscale x 8 x i8> @scalable(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 4)
+; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+;
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 4)
+  ret <vscale x 8 x i8> %a
+}
+
+define <8 x i8> @small_scalar() {
+; CHECK-LABEL: define <8 x i8> @small_scalar(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 3, i64 4)
+; CHECK-NEXT:    ret <8 x i8> [[A]]
+;
+  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 3, i64 4)
+  ret <8 x i8> %a
+}
+
+define <64 x i1> @users_with_bitcast() {
+; CHECK-LABEL: define <64 x i1> @users_with_bitcast(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[VMV_1:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 4)
+; CHECK-NEXT:    [[VMV_2:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 -86, i64 4)
+; CHECK-NEXT:    [[RET1:%.*]] = xor <8 x i8> [[VMV_1]], [[VMV_2]]
+; CHECK-NEXT:    [[RET:%.*]] = bitcast <8 x i8> [[RET1]] to <64 x i1>
+; CHECK-NEXT:    ret <64 x i1> [[RET]]
+;
+  %vmv.1 = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 4)
+  %cast.1 = bitcast <8 x i8> %vmv.1 to <64 x i1>
+  %vmv.2 = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 -86, i64 4)
+  %cast.2 = bitcast <8 x i8> %vmv.2 to <64 x i1>
+  %ret = xor <64 x i1> %cast.1, %cast.2
+  ret <64 x i1> %ret
+}
+
+define <8 x i8> @passthru_non_poison(<8 x i8> %x) {
+; CHECK-LABEL: define <8 x i8> @passthru_non_poison(
+; CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> [[X]], i8 85, i64 4)
+; CHECK-NEXT:    ret <8 x i8> [[A]]
+;
+  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> %x, i8 85, i64 4)
+  ret <8 x i8> %a
+}
+
+define <8 x i8> @scalar_non_constant(i8 %scalar) {
+; CHECK-LABEL: define <8 x i8> @scalar_non_constant(
+; CHECK-SAME: i8 [[SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 [[SCALAR]], i64 4)
+; CHECK-NEXT:    ret <8 x i8> [[A]]
+;
+  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 %scalar, i64 4)
+  ret <8 x i8> %a
+}
+
+define <8 x i8> @vl_non_constant(i64 %vl) {
+; CHECK-LABEL: define <8 x i8> @vl_non_constant(
+; CHECK-SAME: i64 [[VL:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 [[VL]])
+; CHECK-NEXT:    ret <8 x i8> [[A]]
+;
+  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 %vl)
+  ret <8 x i8> %a
+}
+
+define <1 x i128> @scalar_operand_too_large() {
+; CHECK-LABEL: define <1 x i128> @scalar_operand_too_large(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <1 x i128> @llvm.riscv.vmv.v.x.v1i128.i64(<1 x i128> poison, i128 85, i64 4)
+; CHECK-NEXT:    ret <1 x i128> [[A]]
+;
+  %a = call <1 x i128> @llvm.riscv.vmv.v.x.v8i8(<1 x i128> poison, i128 85, i64 4)
+  ret <1 x i128> %a
+}
+
+define <8 x i8> @vl_too_large() {
+; CHECK-LABEL: define <8 x i8> @vl_too_large(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 128)
+; CHECK-NEXT:    ret <8 x i8> [[A]]
+;
+  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 128)
+  ret <8 x i8> %a
+}

>From a737342df0dd16365475e6ea6b78b5e1e093fa28 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Fri, 20 Feb 2026 23:26:18 +0000
Subject: [PATCH 2/5] [InstCombine] Constant-fold riscv.vmv.v.x

The motivating example is: https://godbolt.org/z/vnb3ETsbc

There is an issue with extra vsetvli instructions due to
RISCVInsertVSETVLI asking for instructions that demand VL, and
optimizing based on that. Due to the non-unit VL in vmv.v.x, we
wastefully insert VL number of vsetvli instructions when all operands
are constant. To avoid this, constant-fold vmv.v.x with a bitcast to
handle the type conversion.

llc run showing vsetvli eliminated: https://godbolt.org/z/KEPTxTPcb
---
 .../InstCombine/InstCombineCalls.cpp          | 41 +++++++++++++++++++
 .../InstCombine/RISCV/riscv-vmv-v-x.ll        | 17 ++++----
 2 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6476a38b8a545..f7b2ac2ca96c8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -48,6 +48,7 @@
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsARM.h"
 #include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PatternMatch.h"
@@ -4262,6 +4263,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
           *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
     return nullptr;
   }
+  case Intrinsic::riscv_vmv_v_x: {
+    // If all operands are constant, constant-fold with bitcast. The rationale
+    // for this is to optimize the number of inserted vsetivli instructions, by
+    // RISCVInsertVSETVLI.
+    const APInt *Scalar, *VL;
+    if (!match(II, m_Intrinsic<Intrinsic::riscv_vmv_v_x>(
+                       m_Poison(), m_APInt(Scalar), m_APInt(VL))) ||
+        VL->isOne() || Scalar->getBitWidth() > VL->getBitWidth())
+      return nullptr;
+    auto *VecTy = cast<VectorType>(II->getType());
+    bool IsScalable = VecTy->isScalableTy();
+    ElementCount EC = VecTy->getElementCount();
+    ElementCount ScaleFactor =
+        ElementCount::get(VL->getZExtValue(), IsScalable);
+    auto *EltTy = cast<IntegerType>(VecTy->getScalarType());
+    auto *NewEltTy = IntegerType::get(
+        CI.getContext(), EltTy->getScalarSizeInBits() * VL->getZExtValue());
+    if (!EC.hasKnownScalarFactor(ScaleFactor) ||
+        NewEltTy->getBitWidth() > VL->getBitWidth())
+      return nullptr;
+    ElementCount NewEC =
+        ElementCount::get(EC.getKnownScalarFactor(ScaleFactor), IsScalable);
+    Type *RetTy = VectorType::get(NewEltTy, NewEC);
+    assert(VecTy->canLosslesslyBitCastTo(RetTy) &&
+           "Lossless bitcast between types expected");
+    APInt ScalarExt = Scalar->abs().zext(NewEltTy->getBitWidth());
+    APInt NewScalar(ScalarExt.getBitWidth(), 0);
+    for (unsigned Idx : seq(VL->getZExtValue()))
+      NewScalar |= ScalarExt << Scalar->getBitWidth() * Idx;
+    if (Scalar->isSignBitSet())
+      NewScalar.setSignBit();
+    return replaceInstUsesWith(
+        *II,
+        Builder.CreateBitCast(
+            Builder.CreateIntrinsic(
+                RetTy, Intrinsic::riscv_vmv_v_x,
+                {PoisonValue::get(RetTy), ConstantInt::get(NewEltTy, NewScalar),
+                 ConstantInt::get(II->getOperand(2)->getType(), 1)}),
+            VecTy));
+  }
   default: {
     // Handle target specific intrinsics
     std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
diff --git a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
index 914957dfce350..cc8afb5f4b89e 100644
--- a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
+++ b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
@@ -5,7 +5,8 @@
 define <8 x i8> @fixed() {
 ; CHECK-LABEL: define <8 x i8> @fixed(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 1431655765, i64 1)
+; CHECK-NEXT:    [[A:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
 ; CHECK-NEXT:    ret <8 x i8> [[A]]
 ;
   %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 4)
@@ -15,7 +16,8 @@ define <8 x i8> @fixed() {
 define <vscale x 8 x i8> @scalable() {
 ; CHECK-LABEL: define <vscale x 8 x i8> @scalable(
 ; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 1431655765, i64 1)
+; CHECK-NEXT:    [[A:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 8 x i8>
 ; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
 ;
   %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 4)
@@ -25,7 +27,8 @@ define <vscale x 8 x i8> @scalable() {
 define <8 x i8> @small_scalar() {
 ; CHECK-LABEL: define <8 x i8> @small_scalar(
 ; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 3, i64 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 50529027, i64 1)
+; CHECK-NEXT:    [[A:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
 ; CHECK-NEXT:    ret <8 x i8> [[A]]
 ;
   %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 3, i64 4)
@@ -35,10 +38,10 @@ define <8 x i8> @small_scalar() {
 define <64 x i1> @users_with_bitcast() {
 ; CHECK-LABEL: define <64 x i1> @users_with_bitcast(
 ; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    [[VMV_1:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 4)
-; CHECK-NEXT:    [[VMV_2:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 -86, i64 4)
-; CHECK-NEXT:    [[RET1:%.*]] = xor <8 x i8> [[VMV_1]], [[VMV_2]]
-; CHECK-NEXT:    [[RET:%.*]] = bitcast <8 x i8> [[RET1]] to <64 x i1>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 1431655765, i64 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 -698984874, i64 1)
+; CHECK-NEXT:    [[RET1:%.*]] = xor <2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RET:%.*]] = bitcast <2 x i32> [[RET1]] to <64 x i1>
 ; CHECK-NEXT:    ret <64 x i1> [[RET]]
 ;
   %vmv.1 = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 4)

>From 4a544a71240c4ff19dd4bcbe6a423a0388fe7434 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Sat, 21 Feb 2026 00:44:42 +0000
Subject: [PATCH 3/5] [InstCombine] Strip degenerate fixed-vector case

---
 .../InstCombine/InstCombineCalls.cpp          |   8 +-
 .../InstCombine/RISCV/riscv-vmv-v-x.ll        | 115 ++++++++----------
 2 files changed, 55 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f7b2ac2ca96c8..2b99a2057101f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4272,11 +4272,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
                        m_Poison(), m_APInt(Scalar), m_APInt(VL))) ||
         VL->isOne() || Scalar->getBitWidth() > VL->getBitWidth())
       return nullptr;
-    auto *VecTy = cast<VectorType>(II->getType());
-    bool IsScalable = VecTy->isScalableTy();
+    auto *VecTy = cast<ScalableVectorType>(II->getType());
     ElementCount EC = VecTy->getElementCount();
-    ElementCount ScaleFactor =
-        ElementCount::get(VL->getZExtValue(), IsScalable);
+    ElementCount ScaleFactor = ElementCount::getScalable(VL->getZExtValue());
     auto *EltTy = cast<IntegerType>(VecTy->getScalarType());
     auto *NewEltTy = IntegerType::get(
         CI.getContext(), EltTy->getScalarSizeInBits() * VL->getZExtValue());
@@ -4284,7 +4282,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         NewEltTy->getBitWidth() > VL->getBitWidth())
       return nullptr;
     ElementCount NewEC =
-        ElementCount::get(EC.getKnownScalarFactor(ScaleFactor), IsScalable);
+        ElementCount::getScalable(EC.getKnownScalarFactor(ScaleFactor));
     Type *RetTy = VectorType::get(NewEltTy, NewEC);
     assert(VecTy->canLosslesslyBitCastTo(RetTy) &&
            "Lossless bitcast between types expected");
diff --git a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
index cc8afb5f4b89e..0e27bebade036 100644
--- a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
+++ b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
@@ -2,20 +2,9 @@
 ; RUN: opt -p instcombine -mtriple=riscv32 -mattr=+v -S %s | FileCheck %s
 ; RUN: opt -p instcombine -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
 
-define <8 x i8> @fixed() {
-; CHECK-LABEL: define <8 x i8> @fixed(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 1431655765, i64 1)
-; CHECK-NEXT:    [[A:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    ret <8 x i8> [[A]]
-;
-  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 4)
-  ret <8 x i8> %a
-}
-
 define <vscale x 8 x i8> @scalable() {
 ; CHECK-LABEL: define <vscale x 8 x i8> @scalable(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 1431655765, i64 1)
 ; CHECK-NEXT:    [[A:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 8 x i8>
 ; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
@@ -24,80 +13,80 @@ define <vscale x 8 x i8> @scalable() {
   ret <vscale x 8 x i8> %a
 }
 
-define <8 x i8> @small_scalar() {
-; CHECK-LABEL: define <8 x i8> @small_scalar(
+define <vscale x 8 x i8> @small_scalar() {
+; CHECK-LABEL: define <vscale x 8 x i8> @small_scalar(
 ; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 50529027, i64 1)
-; CHECK-NEXT:    [[A:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    ret <8 x i8> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 50529027, i64 1)
+; CHECK-NEXT:    [[A:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 8 x i8>
+; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
 ;
-  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 3, i64 4)
-  ret <8 x i8> %a
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 3, i64 4)
+  ret <vscale x 8 x i8> %a
 }
 
-define <64 x i1> @users_with_bitcast() {
-; CHECK-LABEL: define <64 x i1> @users_with_bitcast(
+define <vscale x 64 x i1> @users_with_bitcast() {
+; CHECK-LABEL: define <vscale x 64 x i1> @users_with_bitcast(
 ; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 1431655765, i64 1)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.vmv.v.x.v2i32.i64(<2 x i32> poison, i32 -698984874, i64 1)
-; CHECK-NEXT:    [[RET1:%.*]] = xor <2 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[RET:%.*]] = bitcast <2 x i32> [[RET1]] to <64 x i1>
-; CHECK-NEXT:    ret <64 x i1> [[RET]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 1431655765, i64 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 -698984874, i64 1)
+; CHECK-NEXT:    [[RET1:%.*]] = xor <vscale x 2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RET:%.*]] = bitcast <vscale x 2 x i32> [[RET1]] to <vscale x 64 x i1>
+; CHECK-NEXT:    ret <vscale x 64 x i1> [[RET]]
 ;
-  %vmv.1 = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 4)
-  %cast.1 = bitcast <8 x i8> %vmv.1 to <64 x i1>
-  %vmv.2 = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 -86, i64 4)
-  %cast.2 = bitcast <8 x i8> %vmv.2 to <64 x i1>
-  %ret = xor <64 x i1> %cast.1, %cast.2
-  ret <64 x i1> %ret
+  %vmv.1 = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 4)
+  %cast.1 = bitcast <vscale x 8 x i8> %vmv.1 to <vscale x 64 x i1>
+  %vmv.2 = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 -86, i64 4)
+  %cast.2 = bitcast <vscale x 8 x i8> %vmv.2 to <vscale x 64 x i1>
+  %ret = xor <vscale x 64 x i1> %cast.1, %cast.2
+  ret <vscale x 64 x i1> %ret
 }
 
-define <8 x i8> @passthru_non_poison(<8 x i8> %x) {
-; CHECK-LABEL: define <8 x i8> @passthru_non_poison(
-; CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> [[X]], i8 85, i64 4)
-; CHECK-NEXT:    ret <8 x i8> [[A]]
+define <vscale x 8 x i8> @passthru_non_poison(<vscale x 8 x i8> %x) {
+; CHECK-LABEL: define <vscale x 8 x i8> @passthru_non_poison(
+; CHECK-SAME: <vscale x 8 x i8> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> [[X]], i8 85, i64 4)
+; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
 ;
-  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> %x, i8 85, i64 4)
-  ret <8 x i8> %a
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> %x, i8 85, i64 4)
+  ret <vscale x 8 x i8> %a
 }
 
-define <8 x i8> @scalar_non_constant(i8 %scalar) {
-; CHECK-LABEL: define <8 x i8> @scalar_non_constant(
+define <vscale x 8 x i8> @scalar_non_constant(i8 %scalar) {
+; CHECK-LABEL: define <vscale x 8 x i8> @scalar_non_constant(
 ; CHECK-SAME: i8 [[SCALAR:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 [[SCALAR]], i64 4)
-; CHECK-NEXT:    ret <8 x i8> [[A]]
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 [[SCALAR]], i64 4)
+; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
 ;
-  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 %scalar, i64 4)
-  ret <8 x i8> %a
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 %scalar, i64 4)
+  ret <vscale x 8 x i8> %a
 }
 
-define <8 x i8> @vl_non_constant(i64 %vl) {
-; CHECK-LABEL: define <8 x i8> @vl_non_constant(
+define <vscale x 8 x i8> @vl_non_constant(i64 %vl) {
+; CHECK-LABEL: define <vscale x 8 x i8> @vl_non_constant(
 ; CHECK-SAME: i64 [[VL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 [[VL]])
-; CHECK-NEXT:    ret <8 x i8> [[A]]
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 [[VL]])
+; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
 ;
-  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 %vl)
-  ret <8 x i8> %a
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 %vl)
+  ret <vscale x 8 x i8> %a
 }
 
-define <1 x i128> @scalar_operand_too_large() {
-; CHECK-LABEL: define <1 x i128> @scalar_operand_too_large(
+define <vscale x 1 x i128> @scalar_operand_too_large() {
+; CHECK-LABEL: define <vscale x 1 x i128> @scalar_operand_too_large(
 ; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = call <1 x i128> @llvm.riscv.vmv.v.x.v1i128.i64(<1 x i128> poison, i128 85, i64 4)
-; CHECK-NEXT:    ret <1 x i128> [[A]]
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 1 x i128> @llvm.riscv.vmv.v.x.nxv1i128.i64(<vscale x 1 x i128> poison, i128 85, i64 4)
+; CHECK-NEXT:    ret <vscale x 1 x i128> [[A]]
 ;
-  %a = call <1 x i128> @llvm.riscv.vmv.v.x.v8i8(<1 x i128> poison, i128 85, i64 4)
-  ret <1 x i128> %a
+  %a = call <vscale x 1 x i128> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 1 x i128> poison, i128 85, i64 4)
+  ret <vscale x 1 x i128> %a
 }
 
-define <8 x i8> @vl_too_large() {
-; CHECK-LABEL: define <8 x i8> @vl_too_large(
+define <vscale x 8 x i8> @vl_too_large() {
+; CHECK-LABEL: define <vscale x 8 x i8> @vl_too_large(
 ; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8.i64(<8 x i8> poison, i8 85, i64 128)
-; CHECK-NEXT:    ret <8 x i8> [[A]]
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 128)
+; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
 ;
-  %a = call <8 x i8> @llvm.riscv.vmv.v.x.v8i8(<8 x i8> poison, i8 85, i64 128)
-  ret <8 x i8> %a
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 128)
+  ret <vscale x 8 x i8> %a
 }

>From f90570fe4915544bbd713a25943e28b2aa0bd56b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Sat, 21 Feb 2026 10:51:23 +0000
Subject: [PATCH 4/5] [InstCombine/RISCV] Address reviews

---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  40 +++++
 .../Target/RISCV/RISCVTargetTransformInfo.h   |   3 +
 .../InstCombine/InstCombineCalls.cpp          |  39 -----
 .../InstCombine/RISCV/riscv-vmv-v-x.ll        | 140 +++++++++++++-----
 4 files changed, 148 insertions(+), 74 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index a40d77dd734c8..fa09478530fc4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include <cmath>
 #include <optional>
 using namespace llvm;
@@ -3558,3 +3559,42 @@ bool RISCVTTIImpl::shouldCopyAttributeWhenOutliningFrom(
 
   return BaseT::shouldCopyAttributeWhenOutliningFrom(Caller, Attr);
 }
+
+std::optional<Instruction *>
+RISCVTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
+  // If all operands are constant, constant-fold with bitcast: make sure that
+  // all users are bitcasts, to avoid introducing new bitcasts. The rationale
+  // for this is to optimize the number of inserted vsetvli instructions, by
+  // RISCVInsertVSETVLI.
+  const APInt *Scalar;
+  uint64_t VL;
+  if (!match(&II, m_Intrinsic<Intrinsic::riscv_vmv_v_x>(
+                      m_Poison(), m_APInt(Scalar), m_ConstantInt(VL))) ||
+      VL == 1 || Scalar->getBitWidth() > ST->getXLen() ||
+      !all_of(II.users(), match_fn(m_BitCast(m_Value()))))
+    return {};
+  auto *VecTy = cast<ScalableVectorType>(II.getType());
+  ElementCount EC = VecTy->getElementCount();
+  ElementCount ScaleFactor = ElementCount::getScalable(VL);
+  auto *EltTy = cast<IntegerType>(VecTy->getElementType());
+  unsigned NewEltBW = EltTy->getScalarSizeInBits() * VL;
+  if (!EC.hasKnownScalarFactor(ScaleFactor) || NewEltBW > ST->getXLen())
+    return {};
+  auto *NewEltTy = IntegerType::get(II.getContext(), NewEltBW);
+  if (!TLI->isLegalElementTypeForRVV(TLI->getValueType(DL, NewEltTy)))
+    return {};
+  ElementCount NewEC =
+      ElementCount::getScalable(EC.getKnownScalarFactor(ScaleFactor));
+  Type *RetTy = VectorType::get(NewEltTy, NewEC);
+  assert(VecTy->canLosslesslyBitCastTo(RetTy) &&
+         "Lossless bitcast between types expected");
+  APInt NewScalar = APInt::getSplat(NewEltBW, *Scalar);
+  Type *VLTy = II.getOperand(2)->getType();
+  return IC.replaceInstUsesWith(
+      II, IC.Builder.CreateBitCast(
+              IC.Builder.CreateIntrinsic(RetTy, Intrinsic::riscv_vmv_v_x,
+                                         {PoisonValue::get(RetTy),
+                                          ConstantInt::get(NewEltTy, NewScalar),
+                                          ConstantInt::get(VLTy, 1)}),
+              VecTy));
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index cee41b1422b85..e78200696e9c8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -511,6 +511,9 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
   bool
   shouldCopyAttributeWhenOutliningFrom(const Function *Caller,
                                        const Attribute &Attr) const override;
+
+  std::optional<Instruction *>
+  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 2b99a2057101f..6476a38b8a545 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -48,7 +48,6 @@
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsARM.h"
 #include "llvm/IR/IntrinsicsHexagon.h"
-#include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PatternMatch.h"
@@ -4263,44 +4262,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
           *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
     return nullptr;
   }
-  case Intrinsic::riscv_vmv_v_x: {
-    // If all operands are constant, constant-fold with bitcast. The rationale
-    // for this is to optimize the number of inserted vsetivli instructions, by
-    // RISCVInsertVSETVLI.
-    const APInt *Scalar, *VL;
-    if (!match(II, m_Intrinsic<Intrinsic::riscv_vmv_v_x>(
-                       m_Poison(), m_APInt(Scalar), m_APInt(VL))) ||
-        VL->isOne() || Scalar->getBitWidth() > VL->getBitWidth())
-      return nullptr;
-    auto *VecTy = cast<ScalableVectorType>(II->getType());
-    ElementCount EC = VecTy->getElementCount();
-    ElementCount ScaleFactor = ElementCount::getScalable(VL->getZExtValue());
-    auto *EltTy = cast<IntegerType>(VecTy->getScalarType());
-    auto *NewEltTy = IntegerType::get(
-        CI.getContext(), EltTy->getScalarSizeInBits() * VL->getZExtValue());
-    if (!EC.hasKnownScalarFactor(ScaleFactor) ||
-        NewEltTy->getBitWidth() > VL->getBitWidth())
-      return nullptr;
-    ElementCount NewEC =
-        ElementCount::getScalable(EC.getKnownScalarFactor(ScaleFactor));
-    Type *RetTy = VectorType::get(NewEltTy, NewEC);
-    assert(VecTy->canLosslesslyBitCastTo(RetTy) &&
-           "Lossless bitcast between types expected");
-    APInt ScalarExt = Scalar->abs().zext(NewEltTy->getBitWidth());
-    APInt NewScalar(ScalarExt.getBitWidth(), 0);
-    for (unsigned Idx : seq(VL->getZExtValue()))
-      NewScalar |= ScalarExt << Scalar->getBitWidth() * Idx;
-    if (Scalar->isSignBitSet())
-      NewScalar.setSignBit();
-    return replaceInstUsesWith(
-        *II,
-        Builder.CreateBitCast(
-            Builder.CreateIntrinsic(
-                RetTy, Intrinsic::riscv_vmv_v_x,
-                {PoisonValue::get(RetTy), ConstantInt::get(NewEltTy, NewScalar),
-                 ConstantInt::get(II->getOperand(2)->getType(), 1)}),
-            VecTy));
-  }
   default: {
     // Handle target specific intrinsics
     std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
diff --git a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
index 0e27bebade036..3af574277d745 100644
--- a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
+++ b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
@@ -1,34 +1,49 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -p instcombine -mtriple=riscv32 -mattr=+v -S %s | FileCheck %s
-; RUN: opt -p instcombine -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
+; RUN: opt -p instcombine -mtriple=riscv32 -mattr=+v -S %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: opt -p instcombine -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: opt -p instcombine -mtriple=riscv32 -mattr=+zve32x -S %s | FileCheck %s --check-prefixes=CHECK,ZVE32X
 
-define <vscale x 8 x i8> @scalable() {
-; CHECK-LABEL: define <vscale x 8 x i8> @scalable(
+define <vscale x 1 x i64> @scalable() {
+; CHECK-LABEL: define <vscale x 1 x i64> @scalable(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 1431655765, i64 1)
-; CHECK-NEXT:    [[A:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 8 x i8>
-; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 1 x i64>
+; CHECK-NEXT:    ret <vscale x 1 x i64> [[B]]
 ;
   %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 4)
-  ret <vscale x 8 x i8> %a
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
 }
 
-define <vscale x 8 x i8> @small_scalar() {
-; CHECK-LABEL: define <vscale x 8 x i8> @small_scalar(
+define <vscale x 1 x i64> @small_scalar() {
+; CHECK-LABEL: define <vscale x 1 x i64> @small_scalar(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 50529027, i64 1)
-; CHECK-NEXT:    [[A:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 8 x i8>
-; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 1 x i64>
+; CHECK-NEXT:    ret <vscale x 1 x i64> [[B]]
 ;
   %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 3, i64 4)
-  ret <vscale x 8 x i8> %a
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
+}
+
+define <vscale x 1 x i64> @negative_scalar() {
+; CHECK-LABEL: define <vscale x 1 x i64> @negative_scalar(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 -50529028, i64 1)
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 2 x i32> [[TMP1]] to <vscale x 1 x i64>
+; CHECK-NEXT:    ret <vscale x 1 x i64> [[B]]
+;
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 -4, i64 4)
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
 }
 
-define <vscale x 64 x i1> @users_with_bitcast() {
-; CHECK-LABEL: define <vscale x 64 x i1> @users_with_bitcast(
+define <vscale x 64 x i1> @users() {
+; CHECK-LABEL: define <vscale x 64 x i1> @users(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 1431655765, i64 1)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 -698984874, i64 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmv.v.x.nxv2i32.i64(<vscale x 2 x i32> poison, i32 -1431655766, i64 1)
 ; CHECK-NEXT:    [[RET1:%.*]] = xor <vscale x 2 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[RET:%.*]] = bitcast <vscale x 2 x i32> [[RET1]] to <vscale x 64 x i1>
 ; CHECK-NEXT:    ret <vscale x 64 x i1> [[RET]]
@@ -41,52 +56,107 @@ define <vscale x 64 x i1> @users_with_bitcast() {
   ret <vscale x 64 x i1> %ret
 }
 
-define <vscale x 8 x i8> @passthru_non_poison(<vscale x 8 x i8> %x) {
-; CHECK-LABEL: define <vscale x 8 x i8> @passthru_non_poison(
+define <vscale x 8 x i8> @no_bitcast() {
+; CHECK-LABEL: define <vscale x 8 x i8> @no_bitcast(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 4)
+; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+;
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 4)
+  ret <vscale x 8 x i8> %a
+}
+
+define <vscale x 1 x i64> @passthru_non_poison(<vscale x 8 x i8> %x) {
+; CHECK-LABEL: define <vscale x 1 x i64> @passthru_non_poison(
 ; CHECK-SAME: <vscale x 8 x i8> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> [[X]], i8 85, i64 4)
-; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 8 x i8> [[A]] to <vscale x 1 x i64>
+; CHECK-NEXT:    ret <vscale x 1 x i64> [[B]]
 ;
   %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> %x, i8 85, i64 4)
-  ret <vscale x 8 x i8> %a
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
 }
 
-define <vscale x 8 x i8> @scalar_non_constant(i8 %scalar) {
-; CHECK-LABEL: define <vscale x 8 x i8> @scalar_non_constant(
+define <vscale x 1 x i64> @scalar_non_constant(i8 %scalar) {
+; CHECK-LABEL: define <vscale x 1 x i64> @scalar_non_constant(
 ; CHECK-SAME: i8 [[SCALAR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 [[SCALAR]], i64 4)
-; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 8 x i8> [[A]] to <vscale x 1 x i64>
+; CHECK-NEXT:    ret <vscale x 1 x i64> [[B]]
 ;
   %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 %scalar, i64 4)
-  ret <vscale x 8 x i8> %a
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
 }
 
-define <vscale x 8 x i8> @vl_non_constant(i64 %vl) {
-; CHECK-LABEL: define <vscale x 8 x i8> @vl_non_constant(
+define <vscale x 1 x i64> @vl_non_constant(i64 %vl) {
+; CHECK-LABEL: define <vscale x 1 x i64> @vl_non_constant(
 ; CHECK-SAME: i64 [[VL:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 [[VL]])
-; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 8 x i8> [[A]] to <vscale x 1 x i64>
+; CHECK-NEXT:    ret <vscale x 1 x i64> [[B]]
 ;
   %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 %vl)
-  ret <vscale x 8 x i8> %a
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
 }
 
-define <vscale x 1 x i128> @scalar_operand_too_large() {
-; CHECK-LABEL: define <vscale x 1 x i128> @scalar_operand_too_large(
+define <vscale x 2 x i64> @vector_elt_type_too_large() {
+; CHECK-LABEL: define <vscale x 2 x i64> @vector_elt_type_too_large(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[A:%.*]] = call <vscale x 1 x i128> @llvm.riscv.vmv.v.x.nxv1i128.i64(<vscale x 1 x i128> poison, i128 85, i64 4)
-; CHECK-NEXT:    ret <vscale x 1 x i128> [[A]]
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 1 x i128> [[A]] to <vscale x 2 x i64>
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[B]]
 ;
   %a = call <vscale x 1 x i128> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 1 x i128> poison, i128 85, i64 4)
-  ret <vscale x 1 x i128> %a
+  %b = bitcast <vscale x 1 x i128> %a to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %b
 }
 
-define <vscale x 8 x i8> @vl_too_large() {
-; CHECK-LABEL: define <vscale x 8 x i8> @vl_too_large(
+define <vscale x 1 x i64> @vl_too_large() {
+; CHECK-LABEL: define <vscale x 1 x i64> @vl_too_large(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 128)
-; CHECK-NEXT:    ret <vscale x 8 x i8> [[A]]
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 8 x i8> [[A]] to <vscale x 1 x i64>
+; CHECK-NEXT:    ret <vscale x 1 x i64> [[B]]
 ;
   %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 128)
-  ret <vscale x 8 x i8> %a
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
+}
+
+define <vscale x 1 x i32> @vl_not_divisible() {
+; CHECK-LABEL: define <vscale x 1 x i32> @vl_not_divisible(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmv.v.x.nxv4i8.i64(<vscale x 4 x i8> poison, i8 85, i64 8)
+; CHECK-NEXT:    [[B:%.*]] = bitcast <vscale x 4 x i8> [[A]] to <vscale x 1 x i32>
+; CHECK-NEXT:    ret <vscale x 1 x i32> [[B]]
+;
+  %a = call <vscale x 4 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 4 x i8> poison, i8 85, i64 8)
+  %b = bitcast <vscale x 4 x i8> %a to <vscale x 1 x i32>
+  ret <vscale x 1 x i32> %b
+}
+
+define <vscale x 1 x i64> @vector_elt_type_legality() {
+; RV32-LABEL: define <vscale x 1 x i64> @vector_elt_type_legality(
+; RV32-SAME: ) #[[ATTR0]] {
+; RV32-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 8)
+; RV32-NEXT:    [[B:%.*]] = bitcast <vscale x 8 x i8> [[A]] to <vscale x 1 x i64>
+; RV32-NEXT:    ret <vscale x 1 x i64> [[B]]
+;
+; RV64-LABEL: define <vscale x 1 x i64> @vector_elt_type_legality(
+; RV64-SAME: ) #[[ATTR0]] {
+; RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmv.v.x.nxv1i64.i64(<vscale x 1 x i64> poison, i64 6148914691236517205, i64 1)
+; RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
+;
+; ZVE32X-LABEL: define <vscale x 1 x i64> @vector_elt_type_legality(
+; ZVE32X-SAME: ) #[[ATTR0]] {
+; ZVE32X-NEXT:    [[A:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8.i64(<vscale x 8 x i8> poison, i8 85, i64 8)
+; ZVE32X-NEXT:    [[B:%.*]] = bitcast <vscale x 8 x i8> [[A]] to <vscale x 1 x i64>
+; ZVE32X-NEXT:    ret <vscale x 1 x i64> [[B]]
+;
+  %a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.x.nxv8i8(<vscale x 8 x i8> poison, i8 85, i64 8)
+  %b = bitcast <vscale x 8 x i8> %a to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %b
 }

>From 5433ea8ed656297c4a262c9e7180afd280fdefa4 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Sat, 21 Feb 2026 11:29:40 +0000
Subject: [PATCH 5/5] [InstCombine/RISCV] Also test zve32x on RV64

---
 llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
index 3af574277d745..48f73733012b3 100644
--- a/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
+++ b/llvm/test/Transforms/InstCombine/RISCV/riscv-vmv-v-x.ll
@@ -2,6 +2,7 @@
 ; RUN: opt -p instcombine -mtriple=riscv32 -mattr=+v -S %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: opt -p instcombine -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefixes=CHECK,RV64
 ; RUN: opt -p instcombine -mtriple=riscv32 -mattr=+zve32x -S %s | FileCheck %s --check-prefixes=CHECK,ZVE32X
+; RUN: opt -p instcombine -mtriple=riscv64 -mattr=+zve32x -S %s | FileCheck %s --check-prefixes=CHECK,ZVE32X
 
 define <vscale x 1 x i64> @scalable() {
 ; CHECK-LABEL: define <vscale x 1 x i64> @scalable(