[llvm] [RISCV][GISEL] Legalize G_EXTRACT_SUBVECTOR (PR #109426)
    Michael Maitland via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Mon Sep 30 11:21:33 PDT 2024
    
    
  
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/109426
>From 4771bee80792ae9b5e5d97da598d71a48237ecf8 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 16 Sep 2024 11:07:20 -0700
Subject: [PATCH 01/12] [RISCV][GISEL] Legalize G_EXTRACT_SUBVECTOR
---
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |  11 +
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 133 +++++++
 .../Target/RISCV/GISel/RISCVLegalizerInfo.h   |   1 +
 llvm/lib/Target/RISCV/RISCVInstrGISel.td      |  10 +
 .../rvv/legalize-extract-subvector.mir        | 339 ++++++++++++++++++
 5 files changed, 494 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 132b7ec9aeef7c..d9f3f4ab3935d3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -800,6 +800,17 @@ class GInsertVectorElement : public GenericMachineInstr {
   }
 };
 
+/// Represents an extract subvector.
+class GExtractSubvector : public GenericMachineInstr {
+public:
+  Register getSrcVec() const { return getOperand(1).getReg(); }
+  uint64_t getIndexImm() const { return getOperand(2).getImm(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_EXTRACT_SUBVECTOR;
+  }
+};
+
 /// Represents a freeze.
 class GFreeze : public GenericMachineInstr {
 public:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 055193bcc2c8db..db53bc409392bd 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -597,6 +597,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
 
   SplatActions.clampScalar(1, sXLen, sXLen);
 
+  getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
+      .customIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
+      .customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
+
   getLegacyLegalizerInfo().computeTables();
 }
 
@@ -931,6 +935,133 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
   return true;
 }
 
+static LLT getLMUL1Ty(LLT VecTy) {
+  assert(VecTy.getElementType().getSizeInBits() <= 64 &&
+         "Unexpected vector LLT");
+  return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
+                                  VecTy.getElementType().getSizeInBits(),
+                              VecTy.getElementType());
+}
+
+bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
+                                                  MachineIRBuilder &MIB) const {
+  GExtractSubvector &ES = cast<GExtractSubvector>(MI);
+
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+
+  Register Dst = ES.getReg(0);
+  Register Src = ES.getSrcVec();
+  uint64_t Idx = ES.getIndexImm();
+
+  // Only support vectors using custom legalization. We know the DstTy is a
+  // vector since we used that to decide whether to custom legalize or not.
+  LLT BigTy = MRI.getType(Src);
+  if (BigTy.isScalar())
+    return false;
+
+  LLT LitTy = MRI.getType(Dst);
+  Register Vec = Src;
+
+  // We don't have the ability to slide mask vectors down indexed by their i1
+  // elements; the smallest we can do is i8. Often we are able to bitcast to
+  // equivalent i8 vectors.
+  if (LitTy.getElementType() == LLT::scalar(1) && Idx != 0) {
+    auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+    auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+    if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
+      assert(Idx % 8 == 0 && "Invalid index");
+      assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
+             "Unexpected mask vector lowering");
+      Idx /= 8;
+      BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
+      LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
+      Vec = MIB.buildBitcast(BigTy, Vec).getReg(0);
+    } else {
+      // We can't slide this mask vector up indexed by its i1 elements.
+      // This poses a problem when we wish to insert a scalable vector which
+      // can't be re-expressed as a larger type. Just choose the slow path and
+      // extend to a larger type, then truncate back down.
+      LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+      LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
+      auto BigZExt = MIB.buildZExt(ExtBigTy, Vec);
+      auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
+      auto SplatZero = MIB.buildSplatVector(
+          ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
+      MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
+      MI.eraseFromParent();
+      return true;
+    }
+  }
+
+  // With an index of 0 this is a cast-like subvector, which can be performed
+  // with subregister operations.
+  if (Idx == 0)
+    return true;
+
+  // extract_subvector scales the index by vscale if the subvector is scalable,
+  // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
+  const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+  MVT LitTyMVT = getMVTForLLT(LitTy);
+  unsigned SubRegIdx;
+  ElementCount RemIdx;
+  auto Decompose =
+      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+          getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
+  SubRegIdx = Decompose.first;
+  RemIdx = ElementCount::getScalable(Decompose.second);
+
+  // If the Idx has been completely eliminated then this is a subvector extract
+  // which naturally aligns to a vector register. These can easily be handled
+  // using subregister manipulation.
+  // TODO: add tests
+  if (RemIdx.isZero())
+    return true;
+
+  // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
+  // was > M1 then the index would need to be a multiple of VLMAX, and so would
+  // divide exactly.
+  assert(
+      RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second ||
+      RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVII::VLMUL::LMUL_1);
+
+  // If the vector type is an LMUL-group type, extract a subvector equal to the
+  // nearest full vector register type.
+  LLT InterLitTy = BigTy;
+  if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
+                          getLMUL1Ty(BigTy).getSizeInBits())) {
+    // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
+    // we should have successfully decomposed the extract into a subregister.
+    assert(SubRegIdx != RISCV::NoSubRegister);
+    InterLitTy = getLMUL1Ty(BigTy);
+    // SDAG builds a TargetExtractSubreg. A Copy with SubReg specified on the
+    // source Register is the  equivalent.
+    Vec = MIB.buildInstr(TargetOpcode::COPY, {InterLitTy}, {})
+              .addReg(Vec, 0, SubRegIdx)
+              .getReg(0);
+  }
+
+  // Slide this vector register down by the desired number of elements in order
+  // to place the desired subvector starting at element 0.
+  const LLT XLenTy(STI.getXLenVT());
+  auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx.getKnownMinValue());
+  auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
+  uint64_t Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
+  auto Slidedown = MIB.buildInstr(
+      RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
+      {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
+
+  // Now the vector is in the right position, extract our final subvector. This
+  // should resolve to a COPY.
+  auto Extract = MIB.buildExtractSubvector(LitTy, Slidedown, 0);
+
+  // We might have bitcast from a mask type: cast back to the original type if
+  // required.
+  MIB.buildBitcast(Dst, Extract);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool RISCVLegalizerInfo::legalizeCustom(
     LegalizerHelper &Helper, MachineInstr &MI,
     LostDebugLocObserver &LocObserver) const {
@@ -1001,6 +1132,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
     return legalizeExt(MI, MIRBuilder);
   case TargetOpcode::G_SPLAT_VECTOR:
     return legalizeSplatVector(MI, MIRBuilder);
+  case TargetOpcode::G_EXTRACT_SUBVECTOR:
+    return legalizeExtractSubvector(MI, MIRBuilder);
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_STORE:
     return legalizeLoadStore(MI, Helper, MIRBuilder);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 2fc28615e7630d..d2afb175ae42bb 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -46,6 +46,7 @@ class RISCVLegalizerInfo : public LegalizerInfo {
   bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
   bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
   bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const;
+  bool legalizeExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
   bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper,
                          MachineIRBuilder &MIB) const;
 };
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index ba40662c49c1df..b8641418aff747 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -57,3 +57,13 @@ def G_SPLAT_VECTOR_SPLIT_I64_VL : RISCVGenericInstruction {
   let InOperandList = (ins type0:$passthru, type1:$hi, type1:$lo, type2:$vl);
   let hasSideEffects = false;
 }
+
+// Pseudo equivalent to a RISCVISD::VSLIDEDOWN_VL
+def G_VSLIDEDOWN_VL : RISCVGenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$merge, type0:$vec, type1:$idx, type2:$mask,
+                       type3:$vl, type4:$policy);
+  let hasSideEffects = false;
+}
+def : GINodeEquiv<G_VSLIDEDOWN_VL, riscv_slidedown_vl>;
+
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
new file mode 100644
index 00000000000000..78a2f82632d96a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
@@ -0,0 +1,339 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,RV32
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,RV64
+
+# Special handling for i1-element vectors with non-zero index
+---
+name:            extract_subvector_nxv4i1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; RV32-LABEL: name: extract_subvector_nxv4i1
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+    ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+    ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+    ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C2]](s32)
+    ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+    ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s8>), 0
+    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+    ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+    ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_nxv4i1
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+    ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+    ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+    ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; RV64-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C2]](s64)
+    ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+    ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s8>), 0
+    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+    ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+    ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+    ; RV64-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 2 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s1>), 2
+    $v8 = COPY %1(<vscale x 2 x s1>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv8i1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; RV32-LABEL: name: extract_subvector_nxv8i1
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+    ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+    ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+    ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C2]](s32)
+    ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+    ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
+    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+    ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+    ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_nxv8i1
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+    ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+    ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+    ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; RV64-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C2]](s64)
+    ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+    ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
+    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+    ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+    ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+    ; RV64-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 2 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s1>), 2
+    $v8 = COPY %1(<vscale x 2 x s1>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv64i1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; RV32-LABEL: name: extract_subvector_nxv64i1
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+    ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+    ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_EXTRACT_SUBVECTOR [[SELECT]](<vscale x 64 x s8>), 16
+    ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+    ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
+    ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_nxv64i1
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+    ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+    ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_EXTRACT_SUBVECTOR [[SELECT]](<vscale x 64 x s8>), 16
+    ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+    ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+    ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
+    ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+    ; RV64-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 4 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 64 x s1>), 16
+    $v8 = COPY %1(<vscale x 4 x s1>)
+    PseudoRET implicit $v8
+...
+
+# i1-element vectors with zero index
+---
+name:            extract_subvector_nxv4i1_zero
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv4i1_zero
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s1>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 4 x s1>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 1 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s1>), 0
+    $v8 = COPY %1(<vscale x 1 x s1>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv8i1_zero
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv8i1_zero
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s1>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s1>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 4 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s1>), 0
+    $v8 = COPY %1(<vscale x 4 x s1>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv64i1_zero
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv64i1_zero
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 32 x s1>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 64 x s1>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 32 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 32 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 64 x s1>), 0
+    $v8 = COPY %1(<vscale x 32 x s1>)
+    PseudoRET implicit $v8
+...
+
+# Extract with zero index
+---
+name:            extract_subvector_nxv2i8_zero
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv2i8_zero
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 2 x s8>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR %0(<vscale x 2 x s8>), 0
+    $v8 = COPY %1(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv4i16_zero
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv4i16_zero
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 4 x s16>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 2 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 2 x s16>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s16>), 0
+    $v8 = COPY %1(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv8i32_zero
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv8i32_zero
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s32>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s32>), 0
+    $v8 = COPY %1(<vscale x 4 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv8i64_zero
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv8i64_zero
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s64>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 4 x s64>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s64>), 0
+    $v8 = COPY %1(<vscale x 4 x s64>)
+    PseudoRET implicit $v8
+...
+
+# Extract with non-zero index
+---
+name:            extract_subvector_nxv2i8
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv2i8
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 2 x s8>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR %0(<vscale x 2 x s8>), 0
+    $v8 = COPY %1(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv4i16
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv4i16
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 4 x s16>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s16>), 0
+    $v8 = COPY %1(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv8i32
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv8i32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s32>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s32>), 0
+    $v8 = COPY %1(<vscale x 4 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            extract_subvector_nxv8i64
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: extract_subvector_nxv8i64
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s64>), 0
+    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 2 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 2 x s64>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s64>), 0
+    $v8 = COPY %1(<vscale x 2 x s64>)
+    PseudoRET implicit $v8
+...
+
>From e7086d69a7d2eaecceb953f07c11a5a243f29048 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 20 Sep 2024 11:50:42 -0700
Subject: [PATCH 02/12] fixup! respond to review comments
---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp |  2 +-
 llvm/lib/Target/RISCV/RISCVInstrGISel.td      |  2 +-
 .../rvv/legalize-extract-subvector.mir        | 72 ++++++++++++++-----
 3 files changed, 58 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index db53bc409392bd..fa4ef5a02a8382 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -1034,7 +1034,7 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
     assert(SubRegIdx != RISCV::NoSubRegister);
     InterLitTy = getLMUL1Ty(BigTy);
     // SDAG builds a TargetExtractSubreg. A Copy with SubReg specified on the
-    // source Register is the  equivalent.
+    // source Register is the equivalent.
     Vec = MIB.buildInstr(TargetOpcode::COPY, {InterLitTy}, {})
               .addReg(Vec, 0, SubRegIdx)
               .getReg(0);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index b8641418aff747..f6bf74c565ab38 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -62,7 +62,7 @@ def G_SPLAT_VECTOR_SPLIT_I64_VL : RISCVGenericInstruction {
 def G_VSLIDEDOWN_VL : RISCVGenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type0:$merge, type0:$vec, type1:$idx, type2:$mask,
-                       type3:$vl, type4:$policy);
+                       type1:$vl, type1:$policy);
   let hasSideEffects = false;
 }
 def : GINodeEquiv<G_VSLIDEDOWN_VL, riscv_slidedown_vl>;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
index 78a2f82632d96a..6d2b8684cba3d4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
@@ -278,13 +278,33 @@ legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv2i8
-    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 2 x s8>), 0
-    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
-    ; CHECK-NEXT: PseudoRET implicit $v8
+    ; RV32-LABEL: name: extract_subvector_nxv2i8
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s8>), 0
+    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
+    ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s8>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_nxv2i8
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s8>), 0
+    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
+    ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s8>)
+    ; RV64-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
-    %1:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR %0(<vscale x 2 x s8>), 0
+    %1:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR %0(<vscale x 2 x s8>), 1
     $v8 = COPY %1(<vscale x 1 x s8>)
     PseudoRET implicit $v8
 ...
@@ -294,13 +314,33 @@ legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv4i16
-    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 4 x s16>), 0
-    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
-    ; CHECK-NEXT: PseudoRET implicit $v8
+    ; RV32-LABEL: name: extract_subvector_nxv4i16
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s16>), 0
+    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s16>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
+    ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s16>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_nxv4i16
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s16>), 0
+    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s16>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
+    ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s16>)
+    ; RV64-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
-    %1:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s16>), 0
+    %1:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s16>), 2
     $v8 = COPY %1(<vscale x 1 x s16>)
     PseudoRET implicit $v8
 ...
@@ -312,11 +352,11 @@ body:             |
   bb.0.entry:
     ; CHECK-LABEL: name: extract_subvector_nxv8i32
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s32>), 0
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s32>), 4
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s32>)
     ; CHECK-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
-    %1:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s32>), 0
+    %1:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s32>), 4
     $v8 = COPY %1(<vscale x 4 x s32>)
     PseudoRET implicit $v8
 ...
@@ -328,11 +368,11 @@ body:             |
   bb.0.entry:
     ; CHECK-LABEL: name: extract_subvector_nxv8i64
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s64>), 0
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s64>), 2
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 2 x s64>)
     ; CHECK-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
-    %1:_(<vscale x 2 x s64>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s64>), 0
+    %1:_(<vscale x 2 x s64>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s64>), 2
     $v8 = COPY %1(<vscale x 2 x s64>)
     PseudoRET implicit $v8
 ...
>From 6a0740921bb4c866fbe4de63b02be4a5396d8ae5 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 20 Sep 2024 13:11:08 -0700
Subject: [PATCH 03/12] fixup! drop check for malformed MIR
---
 llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index fa4ef5a02a8382..0434b68050d80b 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -953,13 +953,8 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
   Register Src = ES.getSrcVec();
   uint64_t Idx = ES.getIndexImm();
 
-  // Only support vectors using custom legalization. We know the DstTy is a
-  // vector since we used that to decide whether to custom legalize or not.
-  LLT BigTy = MRI.getType(Src);
-  if (BigTy.isScalar())
-    return false;
-
   LLT LitTy = MRI.getType(Dst);
+  LLT BigTy = MRI.getType(Src);
   Register Vec = Src;
 
   // We don't have the ability to slide mask vectors down indexed by their i1
>From 41e6c7629eed9e510e26f26427406490841b49bb Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Sep 2024 12:48:15 -0700
Subject: [PATCH 04/12] fixup! respond to review
---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp |  34 ++--
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   1 +
 .../rvv/legalize-extract-subvector.mir        | 168 ++++++++++++------
 3 files changed, 138 insertions(+), 65 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 0434b68050d80b..d5e8e2d298980a 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -971,6 +971,13 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
       BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
       LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
       Vec = MIB.buildBitcast(BigTy, Vec).getReg(0);
+      auto E = MIB.buildExtractSubvector(LitTy, Vec, Idx);
+      if (LitTy != MRI.getType(Dst))
+        MIB.buildBitcast(Dst, E);
+      else
+        E->getOperand(0).setReg(Dst);
+      MI.eraseFromParent();
+      return true;
     } else {
       // We can't slide this mask vector up indexed by its i1 elements.
       // This poses a problem when we wish to insert a scalable vector which
@@ -997,19 +1004,15 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
   // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
   const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
   MVT LitTyMVT = getMVTForLLT(LitTy);
-  unsigned SubRegIdx;
-  ElementCount RemIdx;
   auto Decompose =
       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
           getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
-  SubRegIdx = Decompose.first;
-  RemIdx = ElementCount::getScalable(Decompose.second);
+  unsigned RemIdx = Decompose.second;
 
   // If the Idx has been completely eliminated then this is a subvector extract
   // which naturally aligns to a vector register. These can easily be handled
   // using subregister manipulation.
-  // TODO: add tests
-  if (RemIdx.isZero())
+  if (RemIdx == 0)
     return true;
 
   // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
@@ -1026,19 +1029,18 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
                           getLMUL1Ty(BigTy).getSizeInBits())) {
     // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
     // we should have successfully decomposed the extract into a subregister.
-    assert(SubRegIdx != RISCV::NoSubRegister);
+    assert(Decompose.first != RISCV::NoSubRegister);
     InterLitTy = getLMUL1Ty(BigTy);
-    // SDAG builds a TargetExtractSubreg. A Copy with SubReg specified on the
-    // source Register is the equivalent.
-    Vec = MIB.buildInstr(TargetOpcode::COPY, {InterLitTy}, {})
-              .addReg(Vec, 0, SubRegIdx)
-              .getReg(0);
+    // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
+    // specified on the source Register (the equivalent) since generic virtual
+    // register does not allow subregister index.
+    Vec = MIB.buildExtractSubvector(InterLitTy, Vec, Idx - RemIdx).getReg(0);
   }
 
   // Slide this vector register down by the desired number of elements in order
   // to place the desired subvector starting at element 0.
   const LLT XLenTy(STI.getXLenVT());
-  auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx.getKnownMinValue());
+  auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx);
   auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
   uint64_t Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
   auto Slidedown = MIB.buildInstr(
@@ -1051,7 +1053,11 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
 
   // We might have bitcast from a mask type: cast back to the original type if
   // required.
-  MIB.buildBitcast(Dst, Extract);
+  if (TypeSize::isKnownLT(LitTy.getSizeInBits(),
+                          MRI.getType(Dst).getSizeInBits()))
+    MIB.buildBitcast(Dst, Extract);
+  else
+    Extract->getOperand(0).setReg(Dst);
 
   MI.eraseFromParent();
   return true;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c4458b14f36ece..04894492514bdd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10525,6 +10525,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
     assert(SubRegIdx != RISCV::NoSubRegister);
     InterSubVT = getLMUL1VT(VecVT);
     Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
+    assert(false);
   }
 
   // Slide this vector register down by the desired number of elements in order
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
index 6d2b8684cba3d4..73ea76a77bf3df 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
@@ -1,15 +1,15 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,RV32
-# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,RV64
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,RV32
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,RV64
 
 # Special handling for i1-element vectors with non-zero index
 ---
-name:            extract_subvector_nxv4i1
+name:            extract_subvector_nxv2i1_nxv4i1
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; RV32-LABEL: name: extract_subvector_nxv4i1
+    ; RV32-LABEL: name: extract_subvector_nxv2i1_nxv4i1
     ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
@@ -23,14 +23,13 @@ body:             |
     ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
     ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s8>), 0
-    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
     ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
-    ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
     ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
     ; RV32-NEXT: PseudoRET implicit $v8
     ;
-    ; RV64-LABEL: name: extract_subvector_nxv4i1
+    ; RV64-LABEL: name: extract_subvector_nxv2i1_nxv4i1
     ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
@@ -46,11 +45,10 @@ body:             |
     ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
     ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s8>), 0
-    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
     ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
     ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
-    ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
     ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
     ; RV64-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
@@ -59,12 +57,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv8i1
+name:            extract_subvector_nxv2i1_nxv8i1
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; RV32-LABEL: name: extract_subvector_nxv8i1
+    ; RV32-LABEL: name: extract_subvector_nxv2i1_nxv8i1
     ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
@@ -78,14 +76,13 @@ body:             |
     ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
     ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
-    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
     ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
-    ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
     ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
     ; RV32-NEXT: PseudoRET implicit $v8
     ;
-    ; RV64-LABEL: name: extract_subvector_nxv8i1
+    ; RV64-LABEL: name: extract_subvector_nxv2i1_nxv8i1
     ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
@@ -101,11 +98,10 @@ body:             |
     ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
     ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
-    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
     ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
     ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
-    ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+    ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
     ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
     ; RV64-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
@@ -114,12 +110,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv64i1
+name:            extract_subvector_nxv4i1_nxv64i1
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; RV32-LABEL: name: extract_subvector_nxv64i1
+    ; RV32-LABEL: name: extract_subvector_nxv4i1_nxv64i1
     ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32)
@@ -133,7 +129,7 @@ body:             |
     ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
     ; RV32-NEXT: PseudoRET implicit $v8
     ;
-    ; RV64-LABEL: name: extract_subvector_nxv64i1
+    ; RV64-LABEL: name: extract_subvector_nxv4i1_nxv64i1
     ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
@@ -154,15 +150,53 @@ body:             |
     $v8 = COPY %1(<vscale x 4 x s1>)
     PseudoRET implicit $v8
 ...
+---
+name:            extract_subvector_nxv32i1_nxv64i1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; RV32-LABEL: name: extract_subvector_nxv32i1_nxv64i1
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
+    ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+    ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[BITCAST]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 4 x s1>), $x0, 3
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
+    ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 4 x s8>)
+    ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 32 x s1>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_nxv32i1_nxv64i1
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
+    ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+    ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[BITCAST]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 4 x s1>), $x0, 3
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
+    ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 4 x s8>)
+    ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 32 x s1>)
+    ; RV64-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 32 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 64 x s1>), 32
+    $v8 = COPY %1(<vscale x 32 x s1>)
+    PseudoRET implicit $v8
+...
 
 # i1-element vectors with zero index
 ---
-name:            extract_subvector_nxv4i1_zero
+name:            extract_subvector_nxv1i1_nxv4i1_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv4i1_zero
+    ; CHECK-LABEL: name: extract_subvector_nxv1i1_nxv4i1_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s1>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 4 x s1>), 0
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s1>)
@@ -173,12 +207,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv8i1_zero
+name:            extract_subvector_nxv4i1_nxv8i1_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv8i1_zero
+    ; CHECK-LABEL: name: extract_subvector_nxv4i1_nxv8i1_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s1>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s1>), 0
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s1>)
@@ -189,12 +223,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv64i1_zero
+name:            extract_subvector_nxv32i1_nxv64i1_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv64i1_zero
+    ; CHECK-LABEL: name: extract_subvector_nxv32i1_nxv64i1_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 32 x s1>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 64 x s1>), 0
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 32 x s1>)
@@ -207,12 +241,12 @@ body:             |
 
 # Extract with zero index
 ---
-name:            extract_subvector_nxv2i8_zero
+name:            extract_subvector_nxv1i8_nxv2i8_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv2i8_zero
+    ; CHECK-LABEL: name: extract_subvector_nxv1i8_nxv2i8_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 2 x s8>), 0
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
@@ -223,12 +257,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv4i16_zero
+name:            extract_subvector_nxv2i16_nxv4i16_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv4i16_zero
+    ; CHECK-LABEL: name: extract_subvector_nxv2i16_nxv4i16_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 4 x s16>), 0
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 2 x s16>)
@@ -239,12 +273,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv8i32_zero
+name:            extract_subvector_nxv4i32_nxv8i32_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv8i32_zero
+    ; CHECK-LABEL: name: extract_subvector_nxv4i32_nxv8i32_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s32>), 0
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s32>)
@@ -255,12 +289,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv8i64_zero
+name:            extract_subvector_nxv4i8_nxv8i64_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv8i64_zero
+    ; CHECK-LABEL: name: extract_subvector_nxv4i8_nxv8i64_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s64>), 0
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s64>)
@@ -273,12 +307,12 @@ body:             |
 
 # Extract with non-zero index
 ---
-name:            extract_subvector_nxv2i8
+name:            extract_subvector_nxv1i8_nxv2i8
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; RV32-LABEL: name: extract_subvector_nxv2i8
+    ; RV32-LABEL: name: extract_subvector_nxv1i8_nxv2i8
     ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
     ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
@@ -287,11 +321,10 @@ body:             |
     ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
     ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s8>), 0
-    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
-    ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s8>)
+    ; RV32-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
     ; RV32-NEXT: PseudoRET implicit $v8
     ;
-    ; RV64-LABEL: name: extract_subvector_nxv2i8
+    ; RV64-LABEL: name: extract_subvector_nxv1i8_nxv2i8
     ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
     ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
@@ -300,8 +333,7 @@ body:             |
     ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
     ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s8>), 0
-    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
-    ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s8>)
+    ; RV64-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s8>)
     ; RV64-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
     %1:_(<vscale x 1 x s8>) = G_EXTRACT_SUBVECTOR %0(<vscale x 2 x s8>), 1
@@ -309,12 +341,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv4i16
+name:            extract_subvector_nxv1i16_nxv4i16
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; RV32-LABEL: name: extract_subvector_nxv4i16
+    ; RV32-LABEL: name: extract_subvector_nxv1i16_nxv4i16
     ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
     ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
@@ -323,11 +355,10 @@ body:             |
     ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
     ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s16>), 0
-    ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s16>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
-    ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s16>)
+    ; RV32-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
     ; RV32-NEXT: PseudoRET implicit $v8
     ;
-    ; RV64-LABEL: name: extract_subvector_nxv4i16
+    ; RV64-LABEL: name: extract_subvector_nxv1i16_nxv4i16
     ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
     ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
@@ -336,8 +367,7 @@ body:             |
     ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEDOWN_VL [[DEF1]], [[DEF]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
     ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s16>), 0
-    ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s16>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
-    ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 1 x s16>)
+    ; RV64-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 1 x s16>)
     ; RV64-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
     %1:_(<vscale x 1 x s16>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s16>), 2
@@ -345,12 +375,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv8i32
+name:            extract_subvector_nxv4i32_nxv8i32
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv8i32
+    ; CHECK-LABEL: name: extract_subvector_nxv4i32_nxv8i32
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s32>), 4
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 4 x s32>)
@@ -361,12 +391,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_nxv8i64
+name:            extract_subvector_nxv2i64_nxv8i64
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; CHECK-LABEL: name: extract_subvector_nxv8i64
+    ; CHECK-LABEL: name: extract_subvector_nxv2i64_nxv8i64
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 8 x s64>), 2
     ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 2 x s64>)
@@ -376,4 +406,40 @@ body:             |
     $v8 = COPY %1(<vscale x 2 x s64>)
     PseudoRET implicit $v8
 ...
+---
+name:            extract_subvector_subregidx
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; RV32-LABEL: name: extract_subvector_subregidx
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 16 x s32>), 0
+    ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEDOWN_VL [[DEF1]], [[EXTRACT_SUBVECTOR]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s32>), 0
+    ; RV32-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR1]](<vscale x 1 x s32>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_subregidx
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 16 x s32>), 0
+    ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEDOWN_VL [[DEF1]], [[EXTRACT_SUBVECTOR]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s32>), 0
+    ; RV64-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR1]](<vscale x 1 x s32>)
+    ; RV64-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 1 x s32>) = G_EXTRACT_SUBVECTOR %0(<vscale x 16 x s32>), 1
+    $v8 = COPY %1(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
 
>From 23c71244bed458f94512bc98540e2effc338b843 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Sep 2024 13:49:43 -0700
Subject: [PATCH 05/12] fixup! respond to comments
---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 17 ++------
 .../rvv/legalize-extract-subvector.mir        | 42 +++++++++++++++++--
 2 files changed, 42 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index d5e8e2d298980a..697fe03df2a33f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -971,11 +971,8 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
       BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
       LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
       Vec = MIB.buildBitcast(BigTy, Vec).getReg(0);
-      auto E = MIB.buildExtractSubvector(LitTy, Vec, Idx);
-      if (LitTy != MRI.getType(Dst))
-        MIB.buildBitcast(Dst, E);
-      else
-        E->getOperand(0).setReg(Dst);
+      auto Extract = MIB.buildExtractSubvector(LitTy, Vec, Idx);
+      MIB.buildBitcast(Dst, Extract);
       MI.eraseFromParent();
       return true;
     } else {
@@ -1049,15 +1046,7 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
 
   // Now the vector is in the right position, extract our final subvector. This
   // should resolve to a COPY.
-  auto Extract = MIB.buildExtractSubvector(LitTy, Slidedown, 0);
-
-  // We might have bitcast from a mask type: cast back to the original type if
-  // required.
-  if (TypeSize::isKnownLT(LitTy.getSizeInBits(),
-                          MRI.getType(Dst).getSizeInBits()))
-    MIB.buildBitcast(Dst, Extract);
-  else
-    Extract->getOperand(0).setReg(Dst);
+  MIB.buildExtractSubvector(Dst, Slidedown, 0);
 
   MI.eraseFromParent();
   return true;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
index 73ea76a77bf3df..dc34d4a552d77b 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
@@ -407,12 +407,12 @@ body:             |
     PseudoRET implicit $v8
 ...
 ---
-name:            extract_subvector_subregidx
+name:            extract_subvector_subregidx_zero
 legalized:       false
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    ; RV32-LABEL: name: extract_subvector_subregidx
+    ; RV32-LABEL: name: extract_subvector_subregidx_zero
     ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
     ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 16 x s32>), 0
     ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
@@ -425,7 +425,7 @@ body:             |
     ; RV32-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR1]](<vscale x 1 x s32>)
     ; RV32-NEXT: PseudoRET implicit $v8
     ;
-    ; RV64-LABEL: name: extract_subvector_subregidx
+    ; RV64-LABEL: name: extract_subvector_subregidx_zero
     ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
     ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 16 x s32>), 0
     ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
@@ -442,4 +442,40 @@ body:             |
     $v8 = COPY %1(<vscale x 1 x s32>)
     PseudoRET implicit $v8
 ...
+---
+name:            extract_subvector_subregidx
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; RV32-LABEL: name: extract_subvector_subregidx
+    ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 16 x s32>), 2
+    ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEDOWN_VL [[DEF1]], [[EXTRACT_SUBVECTOR]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV32-NEXT: [[EXTRACT_SUBVECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s32>), 0
+    ; RV32-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR1]](<vscale x 1 x s32>)
+    ; RV32-NEXT: PseudoRET implicit $v8
+    ;
+    ; RV64-LABEL: name: extract_subvector_subregidx
+    ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 16 x s32>), 2
+    ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+    ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEDOWN_VL [[DEF1]], [[EXTRACT_SUBVECTOR]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 1 x s1>), $x0, 3
+    ; RV64-NEXT: [[EXTRACT_SUBVECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 2 x s32>), 0
+    ; RV64-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR1]](<vscale x 1 x s32>)
+    ; RV64-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %1:_(<vscale x 1 x s32>) = G_EXTRACT_SUBVECTOR %0(<vscale x 16 x s32>), 3
+    $v8 = COPY %1(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
 
>From 43765db676e7d5f439d70ca0594415fe42059253 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Sep 2024 17:36:09 -0700
Subject: [PATCH 06/12] fixup! improve code based on craigs sdag changes
---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 697fe03df2a33f..2be01eeff41fcc 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -953,14 +953,18 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
   Register Src = ES.getSrcVec();
   uint64_t Idx = ES.getIndexImm();
 
+  // With an index of 0 this is a cast-like subvector, which can be performed
+  // with subregister operations.
+  if (Idx == 0)
+    return true;
+
   LLT LitTy = MRI.getType(Dst);
   LLT BigTy = MRI.getType(Src);
-  Register Vec = Src;
 
   // We don't have the ability to slide mask vectors down indexed by their i1
   // elements; the smallest we can do is i8. Often we are able to bitcast to
   // equivalent i8 vectors.
-  if (LitTy.getElementType() == LLT::scalar(1) && Idx != 0) {
+  if (LitTy.getElementType() == LLT::scalar(1)) {
     auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
     auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
     if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
@@ -970,9 +974,9 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
       Idx /= 8;
       BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
       LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
-      Vec = MIB.buildBitcast(BigTy, Vec).getReg(0);
-      auto Extract = MIB.buildExtractSubvector(LitTy, Vec, Idx);
-      MIB.buildBitcast(Dst, Extract);
+      auto CastVec = MIB.buildBitcast(BigTy, Src);
+      auto PromotedExtract = MIB.buildExtractSubvector(LitTy, CastVec, Idx);
+      MIB.buildBitcast(Dst, PromotedExtract);
       MI.eraseFromParent();
       return true;
     } else {
@@ -982,7 +986,7 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
       // extend to a larger type, then truncate back down.
       LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
       LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
-      auto BigZExt = MIB.buildZExt(ExtBigTy, Vec);
+      auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
       auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
       auto SplatZero = MIB.buildSplatVector(
           ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
@@ -992,11 +996,6 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
     }
   }
 
-  // With an index of 0 this is a cast-like subvector, which can be performed
-  // with subregister operations.
-  if (Idx == 0)
-    return true;
-
   // extract_subvector scales the index by vscale if the subvector is scalable,
   // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
   const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
@@ -1022,6 +1021,7 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
   // If the vector type is an LMUL-group type, extract a subvector equal to the
   // nearest full vector register type.
   LLT InterLitTy = BigTy;
+  Register Vec = Src;
   if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
                           getLMUL1Ty(BigTy).getSizeInBits())) {
     // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
@@ -1031,7 +1031,7 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
     // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
     // specified on the source Register (the equivalent) since generic virtual
     // register does not allow subregister index.
-    Vec = MIB.buildExtractSubvector(InterLitTy, Vec, Idx - RemIdx).getReg(0);
+    Vec = MIB.buildExtractSubvector(InterLitTy, Src, Idx - RemIdx).getReg(0);
   }
 
   // Slide this vector register down by the desired number of elements in order
>From e93b1bff424e514276ac747fd396f809adce73aa Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 23 Sep 2024 17:49:29 -0700
Subject: [PATCH 07/12] fixup! remove assert
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 -
 1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 04894492514bdd..c4458b14f36ece 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10525,7 +10525,6 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
     assert(SubRegIdx != RISCV::NoSubRegister);
     InterSubVT = getLMUL1VT(VecVT);
     Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
-    assert(false);
   }
 
   // Slide this vector register down by the desired number of elements in order
>From 2fb918288a3123b5db86063d3eee403058409f30 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 24 Sep 2024 05:54:11 -0700
Subject: [PATCH 08/12] fixup! no else after return
---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 2be01eeff41fcc..bf4d4fbaf88bce 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -979,21 +979,20 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
       MIB.buildBitcast(Dst, PromotedExtract);
       MI.eraseFromParent();
       return true;
-    } else {
-      // We can't slide this mask vector up indexed by its i1 elements.
-      // This poses a problem when we wish to insert a scalable vector which
-      // can't be re-expressed as a larger type. Just choose the slow path and
-      // extend to a larger type, then truncate back down.
-      LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
-      LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
-      auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
-      auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
-      auto SplatZero = MIB.buildSplatVector(
-          ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
-      MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
-      MI.eraseFromParent();
-      return true;
     }
+    // We can't slide this mask vector up indexed by its i1 elements.
+    // This poses a problem when we wish to insert a scalable vector which
+    // can't be re-expressed as a larger type. Just choose the slow path and
+    // extend to a larger type, then truncate back down.
+    LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+    LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
+    auto BigZExt = MIB.buildZExt(ExtBigTy, Src);
+    auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
+    auto SplatZero = MIB.buildSplatVector(
+        ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
+    MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
+    MI.eraseFromParent();
+    return true;
   }
 
   // extract_subvector scales the index by vscale if the subvector is scalable,
>From 486e60ab761de40760b6e7e543781d78f792e370 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Sep 2024 06:44:36 -0700
Subject: [PATCH 09/12] fixup! respond to another round of review
---
 llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index bf4d4fbaf88bce..4d9684d5e2f62d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -598,8 +598,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
   SplatActions.clampScalar(1, sXLen, sXLen);
 
   getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
-      .customIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
-      .customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
+      .customIf(
+          LegalityPredicates::any(typeIsLegalBoolVec(0, BoolVecTys, ST),
+                                  typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)));
 
   getLegacyLegalizerInfo().computeTables();
 }
>From 942bf0fe84469c8b21718eadfa049a0d432768b7 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Sep 2024 09:10:39 -0700
Subject: [PATCH 10/12] fixup! move logic into legalizer helper
---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |  2 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 59 +++++++++++++++++++
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 17 ++----
 .../Target/RISCV/GISel/RISCVLegalizerInfo.h   |  3 +-
 4 files changed, 68 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 5360850deeffd8..31c447cfaec953 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -378,6 +378,8 @@ class LegalizerHelper {
                                         LLT CastTy);
   LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
                                      LLT CastTy);
+  LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
+                                         LLT CastTy);
 
   LegalizeResult lowerConstant(MachineInstr &MI);
   LegalizeResult lowerFConstant(MachineInstr &MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e64d3f51a01111..4836cca1e9f630 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3666,6 +3666,63 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
   return Legalized;
 }
 
+/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
+///
+///  <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
+///
+/// ===>
+///
+///  <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
+///  <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
+///  <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
+                                         LLT CastTy) {
+  auto ES = dyn_cast<GExtractSubvector>(&MI);
+  if (!ES)
+    return UnableToLegalize;
+
+  if (!CastTy.isVector())
+    return UnableToLegalize;
+
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  Register Dst = ES->getReg(0);
+  Register Src = ES->getSrcVec();
+  uint64_t Idx = ES->getIndexImm();
+
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+  LLT DstTy = MRI.getType(Dst);
+  LLT SrcTy = MRI.getType(Src);
+  ElementCount DstTyEC = DstTy.getElementCount();
+  ElementCount SrcTyEC = SrcTy.getElementCount();
+  auto DstTyMinElts = DstTyEC.getKnownMinValue();
+  auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
+
+  if (DstTy == CastTy)
+    return Legalized;
+
+  auto AdjustAmt = CastTy.getScalarType().getSizeInBits();
+  if (DstTyMinElts < AdjustAmt || SrcTyMinElts < AdjustAmt)
+    return UnableToLegalize;
+
+  if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
+      SrcTyMinElts % AdjustAmt != 0)
+    return UnableToLegalize;
+
+  Idx /= AdjustAmt;
+  SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
+
+  auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
+  auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
+  MIRBuilder.buildBitcast(Dst, PromotedES);
+
+  ES->eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
   Register DstReg = LoadMI.getDstReg();
@@ -3972,6 +4029,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
   case TargetOpcode::G_CONCAT_VECTORS:
     return bitcastConcatVector(MI, TypeIdx, CastTy);
+  case TargetOpcode::G_EXTRACT_SUBVECTOR:
+    return bitcastExtractSubvector(MI, TypeIdx, CastTy);
   default:
     return UnableToLegalize;
   }
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 4d9684d5e2f62d..1f1908eb53200a 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -945,6 +945,7 @@ static LLT getLMUL1Ty(LLT VecTy) {
 }
 
 bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
+                                                  LegalizerHelper &Helper,
                                                   MachineIRBuilder &MIB) const {
   GExtractSubvector &ES = cast<GExtractSubvector>(MI);
 
@@ -969,17 +970,9 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
     auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
     auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
     if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
-      assert(Idx % 8 == 0 && "Invalid index");
-      assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
-             "Unexpected mask vector lowering");
-      Idx /= 8;
-      BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
-      LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
-      auto CastVec = MIB.buildBitcast(BigTy, Src);
-      auto PromotedExtract = MIB.buildExtractSubvector(LitTy, CastVec, Idx);
-      MIB.buildBitcast(Dst, PromotedExtract);
-      MI.eraseFromParent();
-      return true;
+      LLT CastTy =
+          LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
+      return Helper.bitcast(MI, 0, CastTy);
     }
     // We can't slide this mask vector up indexed by its i1 elements.
     // This poses a problem when we wish to insert a scalable vector which
@@ -1123,7 +1116,7 @@ bool RISCVLegalizerInfo::legalizeCustom(
   case TargetOpcode::G_SPLAT_VECTOR:
     return legalizeSplatVector(MI, MIRBuilder);
   case TargetOpcode::G_EXTRACT_SUBVECTOR:
-    return legalizeExtractSubvector(MI, MIRBuilder);
+    return legalizeExtractSubvector(MI, Helper, MIRBuilder);
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_STORE:
     return legalizeLoadStore(MI, Helper, MIRBuilder);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index d2afb175ae42bb..6919bca5a9104f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -46,7 +46,8 @@ class RISCVLegalizerInfo : public LegalizerInfo {
   bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
   bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
   bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-  bool legalizeExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
+  bool legalizeExtractSubvector(MachineInstr &MI, LegalizerHelper &Helper,
+                                MachineIRBuilder &MIB) const;
   bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper,
                          MachineIRBuilder &MIB) const;
 };
>From 6cc5e3e1c5dcf01881ed3df49788984443bb29c5 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Sep 2024 10:57:03 -0700
Subject: [PATCH 11/12] fixup! cleanup
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 11 +--------
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 23 +++++++++++--------
 2 files changed, 14 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 4836cca1e9f630..a87b3460e9e965 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3678,12 +3678,7 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
 LegalizerHelper::LegalizeResult
 LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
                                          LLT CastTy) {
-  auto ES = dyn_cast<GExtractSubvector>(&MI);
-  if (!ES)
-    return UnableToLegalize;
-
-  if (!CastTy.isVector())
-    return UnableToLegalize;
+  auto ES = cast<GExtractSubvector>(&MI);
 
   if (TypeIdx != 0)
     return UnableToLegalize;
@@ -3705,16 +3700,12 @@ LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
     return Legalized;
 
   auto AdjustAmt = CastTy.getScalarType().getSizeInBits();
-  if (DstTyMinElts < AdjustAmt || SrcTyMinElts < AdjustAmt)
-    return UnableToLegalize;
-
   if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
       SrcTyMinElts % AdjustAmt != 0)
     return UnableToLegalize;
 
   Idx /= AdjustAmt;
   SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
-
   auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
   auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
   MIRBuilder.buildBitcast(Dst, PromotedES);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 1f1908eb53200a..0983d103bdc3e0 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -597,7 +597,20 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
 
   SplatActions.clampScalar(1, sXLen, sXLen);
 
+  LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
+    LLT DstTy = Query.Types[0];
+    LLT SrcTy = Query.Types[1];
+    return DstTy.getElementType() == LLT::scalar(1) &&
+           DstTy.getElementCount().getKnownMinValue() >= 8 &&
+           SrcTy.getElementCount().getKnownMinValue() >= 8;
+  };
   getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
+      // We don't have the ability to slide mask vectors down indexed by their
+      // i1 elements; the smallest we can do is i8. Often we are able to bitcast
+      // to equivalent i8 vectors.
+      .bitcastIf(
+          all(typeIsLegalBoolVec(0, BoolVecTys, ST), ExtractSubvecBitcastPred),
+          /*Mutation=*/nullptr)
       .customIf(
           LegalityPredicates::any(typeIsLegalBoolVec(0, BoolVecTys, ST),
                                   typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)));
@@ -963,17 +976,7 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
   LLT LitTy = MRI.getType(Dst);
   LLT BigTy = MRI.getType(Src);
 
-  // We don't have the ability to slide mask vectors down indexed by their i1
-  // elements; the smallest we can do is i8. Often we are able to bitcast to
-  // equivalent i8 vectors.
   if (LitTy.getElementType() == LLT::scalar(1)) {
-    auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
-    auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
-    if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
-      LLT CastTy =
-          LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
-      return Helper.bitcast(MI, 0, CastTy);
-    }
     // We can't slide this mask vector up indexed by its i1 elements.
     // This poses a problem when we wish to insert a scalable vector which
     // can't be re-expressed as a larger type. Just choose the slow path and
>From 642eccca4befd3b20767cbbb83e2b242794bd657 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Sep 2024 11:18:37 -0700
Subject: [PATCH 12/12] fixup! fix bitcastIf
---
 llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp          | 6 +++++-
 llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp       | 9 +++++++--
 .../legalizer/rvv/legalize-extract-subvector.mir         | 6 ++++--
 3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a87b3460e9e965..8b01978c6ed96a 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3680,6 +3680,9 @@ LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
                                          LLT CastTy) {
   auto ES = cast<GExtractSubvector>(&MI);
 
+  if (!CastTy.isVector())
+    return UnableToLegalize;
+
   if (TypeIdx != 0)
     return UnableToLegalize;
 
@@ -3699,7 +3702,8 @@ LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
   if (DstTy == CastTy)
     return Legalized;
 
-  auto AdjustAmt = CastTy.getScalarType().getSizeInBits();
+  auto AdjustAmt = CastTy.getScalarType().getSizeInBits() /
+                   DstTy.getScalarType().getSizeInBits();
   if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
       SrcTyMinElts % AdjustAmt != 0)
     return UnableToLegalize;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 0983d103bdc3e0..4aeb402042cf7f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -609,8 +609,13 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
       // i1 elements; the smallest we can do is i8. Often we are able to bitcast
       // to equivalent i8 vectors.
       .bitcastIf(
-          all(typeIsLegalBoolVec(0, BoolVecTys, ST), ExtractSubvecBitcastPred),
-          /*Mutation=*/nullptr)
+          all(typeIsLegalBoolVec(0, BoolVecTys, ST),
+              typeIsLegalBoolVec(1, BoolVecTys, ST), ExtractSubvecBitcastPred),
+          [=](const LegalityQuery &Query) {
+            LLT CastTy = LLT::vector(
+                Query.Types[0].getElementCount().divideCoefficientBy(8), 8);
+            return std::pair(0, CastTy);
+          })
       .customIf(
           LegalityPredicates::any(typeIsLegalBoolVec(0, BoolVecTys, ST),
                                   typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)));
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
index dc34d4a552d77b..5f730bf436ea5a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
@@ -230,8 +230,10 @@ body:             |
   bb.0.entry:
     ; CHECK-LABEL: name: extract_subvector_nxv32i1_nxv64i1_zero
     ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
-    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 32 x s1>) = G_EXTRACT_SUBVECTOR [[DEF]](<vscale x 64 x s1>), 0
-    ; CHECK-NEXT: $v8 = COPY [[EXTRACT_SUBVECTOR]](<vscale x 32 x s1>)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
+    ; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_EXTRACT_SUBVECTOR [[BITCAST]](<vscale x 8 x s8>), 0
+    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 32 x s1>)
     ; CHECK-NEXT: PseudoRET implicit $v8
     %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
     %1:_(<vscale x 32 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 64 x s1>), 0
    
    
More information about the llvm-commits
mailing list