[llvm] 3c4dbf6 - [Verifier] Fail on overrunning and invalid indices for {insert,extract} vector intrinsics

Wed Jun 23 03:34:01 PDT 2021

Author: Joe Ellis
Date: 2021-06-23T10:33:22Z
New Revision: 3c4dbf6ea9a06d0e5b460895bc5677ca7e382b4e

URL: https://github.com/llvm/llvm-project/commit/3c4dbf6ea9a06d0e5b460895bc5677ca7e382b4e
DIFF: https://github.com/llvm/llvm-project/commit/3c4dbf6ea9a06d0e5b460895bc5677ca7e382b4e.diff

LOG: [Verifier] Fail on overrunning and invalid indices for {insert,extract} vector intrinsics

With regards to overrunning, the langref (llvm/docs/LangRef.rst)
specifies:

   (llvm.experimental.vector.insert)
   Elements ``idx`` through (``idx`` + num_elements(``subvec``) - 1)
   must be valid ``vec`` indices. If this condition cannot be determined
   statically but is false at runtime, then the result vector is
   undefined.

   (llvm.experimental.vector.extract)
   Elements ``idx`` through (``idx`` + num_elements(result_type) - 1)
   must be valid vector indices. If this condition cannot be determined
   statically but is false at runtime, then the result vector is
   undefined.

For the non-mixed cases (e.g. inserting/extracting a scalable into/from
another scalable, or inserting/extracting a fixed into/from another
fixed), it is possible to statically check whether or not the above
conditions are met. This was previously missing from the verifier, and
if the conditions were found to be false, the result of the
insertion/extraction would be replaced with an undef.

With regards to invalid indices, the langref (llvm/docs/LangRef.rst)
specifies:

    (llvm.experimental.vector.insert)
    ``idx`` represents the starting element number at which ``subvec``
    will be inserted. ``idx`` must be a constant multiple of
    ``subvec``'s known minimum vector length.

    (llvm.experimental.vector.extract)
    The ``idx`` specifies the starting element number within ``vec``
    from which a subvector is extracted. ``idx`` must be a constant
    multiple of the known-minimum vector length of the result type.

Similarly, these conditions were not previously enforced in the
verifier. In some circumstances, invalid indices were permitted
silently, and in other circumstances, an undef was spawned where a
verifier error would have been preferred.

This commit adds verifier checks to enforce the constraints above.

Differential Revision: https://reviews.llvm.org/D104468

Added: 
    llvm/test/Verifier/insert-extract-intrinsics-invalid.ll

Modified: 
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/CodeGen/AArch64/sve-extract-vector.ll
    llvm/test/CodeGen/AArch64/sve-insert-vector.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
    llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
    llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 9322919aae584..4a7ccebd50bfa 100644

--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5316,23 +5316,63 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     break;
   }
   case Intrinsic::experimental_vector_insert: {
-    VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
-    VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
+    Value *Vec = Call.getArgOperand(0);
+    Value *SubVec = Call.getArgOperand(1);
+    Value *Idx = Call.getArgOperand(2);
+    unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
 
+    VectorType *VecTy = cast<VectorType>(Vec->getType());
+    VectorType *SubVecTy = cast<VectorType>(SubVec->getType());
+
+    ElementCount VecEC = VecTy->getElementCount();
+    ElementCount SubVecEC = SubVecTy->getElementCount();
     Assert(VecTy->getElementType() == SubVecTy->getElementType(),
            "experimental_vector_insert parameters must have the same element "
            "type.",
            &Call);
+    Assert(IdxN % SubVecEC.getKnownMinValue() == 0,
+           "experimental_vector_insert index must be a constant multiple of "
+           "the subvector's known minimum vector length.");
+
+    // If this insertion is not the 'mixed' case where a fixed vector is
+    // inserted into a scalable vector, ensure that the insertion of the
+    // subvector does not overrun the parent vector.
+    if (VecEC.isScalable() == SubVecEC.isScalable()) {
+      Assert(
+          IdxN < VecEC.getKnownMinValue() &&
+              IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+          "subvector operand of experimental_vector_insert would overrun the "
+          "vector being inserted into.");
+    }
     break;
   }
   case Intrinsic::experimental_vector_extract: {
+    Value *Vec = Call.getArgOperand(0);
+    Value *Idx = Call.getArgOperand(1);
+    unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
+
     VectorType *ResultTy = cast<VectorType>(Call.getType());
-    VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
+    VectorType *VecTy = cast<VectorType>(Vec->getType());
+
+    ElementCount VecEC = VecTy->getElementCount();
+    ElementCount ResultEC = ResultTy->getElementCount();
 
     Assert(ResultTy->getElementType() == VecTy->getElementType(),
            "experimental_vector_extract result must have the same element "
            "type as the input vector.",
            &Call);
+    Assert(IdxN % ResultEC.getKnownMinValue() == 0,
+           "experimental_vector_extract index must be a constant multiple of "
+           "the result type's known minimum vector length.");
+
+    // If this extraction is not the 'mixed' case where a fixed vector is is
+    // extracted from a scalable vector, ensure that the extraction does not
+    // overrun the parent vector.
+    if (VecEC.isScalable() == ResultEC.isScalable()) {
+      Assert(IdxN < VecEC.getKnownMinValue() &&
+                 IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+             "experimental_vector_extract would overrun.");
+    }
     break;
   }
   case Intrinsic::experimental_noalias_scope_decl: {

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c5ca8699c5c07..d5aa3b69fe74a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1876,13 +1876,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       unsigned SubVecNumElts = SubVecTy->getNumElements();
       unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
 
-      // The result of this call is undefined if IdxN is not a constant multiple
-      // of the SubVec's minimum vector length OR the insertion overruns Vec.
-      if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
-        replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
-        return eraseInstFromFunction(CI);
-      }
-
       // An insert that entirely overwrites Vec with SubVec is a nop.
       if (VecNumElts == SubVecNumElts) {
         replaceInstUsesWith(CI, SubVec);
@@ -1930,14 +1923,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       unsigned VecNumElts = VecTy->getNumElements();
       unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
 
-      // The result of this call is undefined if IdxN is not a constant multiple
-      // of the result type's minimum vector length OR the extraction overruns
-      // Vec.
-      if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
-        replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
-        return eraseInstFromFunction(CI);
-      }
-
       // Extracting the entirety of Vec is a nop.
       if (VecNumElts == DstNumElts) {
         replaceInstUsesWith(CI, Vec);

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
index 572e246fe300c..928407a5f919a 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
@@ -12,18 +12,25 @@ define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
 }
 
 ; Goes through memory currently; idx != 0.
-define <2 x i64> @extract_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec) nounwind {
-; CHECK-LABEL: extract_v2i64_nxv2i64_idx1:
+define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
+; CHECK-LABEL: extract_v2i64_nxv2i64_idx2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #2
+; CHECK-NEXT:    cmp x9, #2 // =2
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #8]
+; CHECK-NEXT:    lsl x8, x8, #3
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 1)
+%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
 ret <2 x i64> %retval
 }
 
@@ -38,18 +45,25 @@ ret <4 x i32> %retval
 }
 
 ; Goes through memory currently; idx != 0.
-define <4 x i32> @extract_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec) nounwind {
-; CHECK-LABEL: extract_v4i32_nxv4i32_idx1:
+define <4 x i32> @extract_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_v4i32_nxv4i32_idx4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntw x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #4
+; CHECK-NEXT:    cmp x9, #4 // =4
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #4]
+; CHECK-NEXT:    lsl x8, x8, #2
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 1)
+  %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 4)
   ret <4 x i32> %retval
 }
 
@@ -64,18 +78,25 @@ define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
 }
 
 ; Goes through memory currently; idx != 0.
-define <8 x i16> @extract_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec) nounwind {
-; CHECK-LABEL: extract_v8i16_nxv8i16_idx1:
+define <8 x i16> @extract_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec) nounwind {
+; CHECK-LABEL: extract_v8i16_nxv8i16_idx8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cnth x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    cmp x9, #8 // =8
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #2]
+; CHECK-NEXT:    lsl x8, x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 1)
+  %retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 8)
   ret <8 x i16> %retval
 }
 
@@ -90,18 +111,24 @@ define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
 }
 
 ; Goes through memory currently; idx != 0.
-define <16 x i8> @extract_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec) nounwind {
-; CHECK-LABEL: extract_v16i8_nxv16i8_idx1:
+define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind {
+; CHECK-LABEL: extract_v16i8_nxv16i8_idx16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    rdvl x9, #1
+; CHECK-NEXT:    sub x9, x9, #1 // =1
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov w8, #16
+; CHECK-NEXT:    cmp x9, #16 // =16
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
-; CHECK-NEXT:    ldur q0, [sp, #1]
+; CHECK-NEXT:    csel x8, x9, x8, lo
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    ldr q0, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 1)
+  %retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 16)
   ret <16 x i8> %retval
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index bd5b81469a6a7..2816e97e09865 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -17,19 +17,26 @@ define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i6
   ret <vscale x 2 x i64> %retval
 }
 
-define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
-; CHECK-LABEL: insert_v2i64_nxv2i64_idx1:
+define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
+; CHECK-LABEL: insert_v2i64_nxv2i64_idx2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #2
+; CHECK-NEXT:    cmp x9, #2 // =2
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    lsl x8, x8, #3
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #8]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1)
+  %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 2)
   ret <vscale x 2 x i64> %retval
 }
 
@@ -49,19 +56,26 @@ define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i3
   ret <vscale x 4 x i32> %retval
 }
 
-define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
-; CHECK-LABEL: insert_v4i32_nxv4i32_idx1:
+define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
+; CHECK-LABEL: insert_v4i32_nxv4i32_idx4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cntw x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #4
+; CHECK-NEXT:    cmp x9, #4 // =4
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    lsl x8, x8, #2
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #4]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1)
+  %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 4)
   ret <vscale x 4 x i32> %retval
 }
 
@@ -81,19 +95,26 @@ define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i1
   ret <vscale x 8 x i16> %retval
 }
 
-define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
-; CHECK-LABEL: insert_v8i16_nxv8i16_idx1:
+define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
+; CHECK-LABEL: insert_v8i16_nxv8i16_idx8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    cnth x9
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    cmp x9, #8 // =8
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    lsl x8, x8, #1
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #2]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1)
+  %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 8)
   ret <vscale x 8 x i16> %retval
 }
 
@@ -113,19 +134,25 @@ define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i
   ret <vscale x 16 x i8> %retval
 }
 
-define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
-; CHECK-LABEL: insert_v16i8_nxv16i8_idx1:
+define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
+; CHECK-LABEL: insert_v16i8_nxv16i8_idx16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    rdvl x9, #1
+; CHECK-NEXT:    sub x9, x9, #1 // =1
+; CHECK-NEXT:    mov w8, #16
+; CHECK-NEXT:    cmp x9, #16 // =16
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    csel x8, x9, x8, lo
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
-; CHECK-NEXT:    stur q1, [sp, #1]
+; CHECK-NEXT:    str q1, [x9, x8]
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1)
+  %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 16)
   ret <vscale x 16 x i8> %retval
 }
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
index b60f0df194d3e..72843d45803c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
@@ -297,23 +297,6 @@ define void @extract_v8i1_nxv2i1_0(<vscale x 2 x i1> %x, <8 x i1>* %y) {
   ret void
 }
 
-define void @extract_v8i1_nxv2i1_2(<vscale x 2 x i1> %x, <8 x i1>* %y) {
-; CHECK-LABEL: extract_v8i1_nxv2i1_2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.i v25, 0
-; CHECK-NEXT:    vmerge.vim v25, v25, 1, v0
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vi v25, v25, 2
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vmsne.vi v25, v25, 0
-; CHECK-NEXT:    vse1.v v25, (a0)
-; CHECK-NEXT:    ret
-  %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
-  store <8 x i1> %c, <8 x i1>* %y
-  ret void
-}
-
 define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
 ; CHECK-LABEL: extract_v8i1_nxv64i1_0:
 ; CHECK:       # %bb.0:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 327f025f6b845..7ca4ba21dab74 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -68,31 +68,6 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, <8 x
   ret <vscale x 8 x i32> %v
 }
 
-define <vscale x 8 x i32> @insert_nxv8i32_v8i32_4(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
-; LMULMAX2-LABEL: insert_nxv8i32_v8i32_4:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX2-NEXT:    vle32.v v28, (a0)
-; LMULMAX2-NEXT:    vsetivli zero, 12, e32, m4, tu, mu
-; LMULMAX2-NEXT:    vslideup.vi v8, v28, 4
-; LMULMAX2-NEXT:    ret
-;
-; LMULMAX1-LABEL: insert_nxv8i32_v8i32_4:
-; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-NEXT:    vle32.v v28, (a0)
-; LMULMAX1-NEXT:    addi a0, a0, 16
-; LMULMAX1-NEXT:    vle32.v v12, (a0)
-; LMULMAX1-NEXT:    vsetivli zero, 8, e32, m4, tu, mu
-; LMULMAX1-NEXT:    vslideup.vi v8, v28, 4
-; LMULMAX1-NEXT:    vsetivli zero, 12, e32, m4, tu, mu
-; LMULMAX1-NEXT:    vslideup.vi v8, v12, 8
-; LMULMAX1-NEXT:    ret
-  %sv = load <8 x i32>, <8 x i32>* %svp
-  %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 4)
-  ret <vscale x 8 x i32> %v
-}
-
 define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
 ; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8:
 ; LMULMAX2:       # %bb.0:
@@ -509,28 +484,6 @@ define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, <4 x i1>* %
   ret <vscale x 2 x i1> %c
 }
 
-define <vscale x 2 x i1> @insert_nxv2i1_v4i1_6(<vscale x 2 x i1> %v, <4 x i1>* %svp) {
-; CHECK-LABEL: insert_nxv2i1_v4i1_6:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vle1.v v27, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.i v25, 0
-; CHECK-NEXT:    vmerge.vim v25, v25, 1, v0
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.i v26, 0
-; CHECK-NEXT:    vmv1r.v v0, v27
-; CHECK-NEXT:    vmerge.vim v26, v26, 1, v0
-; CHECK-NEXT:    vsetivli zero, 10, e8, mf4, tu, mu
-; CHECK-NEXT:    vslideup.vi v25, v26, 6
-; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vmsne.vi v0, v25, 0
-; CHECK-NEXT:    ret
-  %sv = load <4 x i1>, <4 x i1>* %svp
-  %c = call <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 6)
-  ret <vscale x 2 x i1> %c
-}
-
 define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
 ; CHECK-LABEL: insert_nxv8i1_v4i1_0:
 ; CHECK:       # %bb.0:

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
index ff069d112ff04..a54683943a536 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
@@ -289,7 +289,7 @@ define <vscale x 2 x i1> @dupq_neg7() #0 {
 ; CHECK-NEXT: ret
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef,
-    <2 x i64> <i64 1, i64 1>, i64 1)
+    <2 x i64> <i64 1, i64 1>, i64 2)
   %3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %2 , i64 0)
   %4 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
   %5 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %3, <vscale x 2 x i64> %4)

diff  --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
index 5425f402f1c17..4263650ec59a8 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
@@ -100,29 +100,6 @@ define <3 x i32> @valid_extraction_h(<8 x i32> %vec) {
   ret <3 x i32> %1
 }
 
-; ============================================================================ ;
-; Invalid canonicalizations
-; ============================================================================ ;
-
-; Idx must be the be a constant multiple of the destination vector's length,
-; otherwise the result is undefined.
-define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) {
-; CHECK-LABEL: @idx_not_constant_multiple(
-; CHECK-NEXT:    ret <4 x i32> undef
-;
-  %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
-  ret <4 x i32> %1
-}
-
-; If the extraction overruns the vector, the result is undefined.
-define <10 x i32> @extract_overrun(<8 x i32> %vec) {
-; CHECK-LABEL: @extract_overrun(
-; CHECK-NEXT:    ret <10 x i32> undef
-;
-  %1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0)
-  ret <10 x i32> %1
-}
-
 ; ============================================================================ ;
 ; Scalable cases
 ; ============================================================================ ;

diff  --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
index e504d3d0bb1b5..2c0056d799a19 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
@@ -108,29 +108,6 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
   ret <8 x i32> %1
 }
 
-; ============================================================================ ;
-; Invalid canonicalizations
-; ============================================================================ ;
-
-; Idx must be the be a constant multiple of the subvector's minimum vector
-; length, otherwise the result is undefined.
-define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
-; CHECK-LABEL: @idx_not_constant_multiple(
-; CHECK-NEXT:    ret <8 x i32> undef
-;
-  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
-  ret <8 x i32> %1
-}
-
-; If the insertion overruns the vector, the result is undefined.
-define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) {
-; CHECK-LABEL: @insert_overrun(
-; CHECK-NEXT:    ret <8 x i32> undef
-;
-  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4)
-  ret <8 x i32> %1
-}
-
 ; ============================================================================ ;
 ; Scalable cases
 ; ============================================================================ ;

diff  --git a/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll b/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll
new file mode 100644
index 0000000000000..e7bf590857138
--- /dev/null
+++ b/llvm/test/Verifier/insert-extract-intrinsics-invalid.ll
@@ -0,0 +1,72 @@
+; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
+
+;
+; Test that extractions/insertion indices are validated.
+;
+
+; CHECK: experimental_vector_extract index must be a constant multiple of the result type's known minimum vector length.
+define <4 x i32> @extract_idx_not_constant_multiple(<8 x i32> %vec) {
+  %1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
+  ret <4 x i32> %1
+}
+
+; CHECK: experimental_vector_insert index must be a constant multiple of the subvector's known minimum vector length.
+define <8 x i32> @insert_idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
+  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
+  ret <8 x i32> %1
+}
+
+;
+; Test that extractions/insertions which 'overrun' are captured.
+;
+
+; CHECK: experimental_vector_extract would overrun.
+define <3 x i32> @extract_overrun_fixed_fixed(<8 x i32> %vec) {
+  %1 = call <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32> %vec, i64 6)
+  ret <3 x i32> %1
+}
+
+; CHECK: experimental_vector_extract would overrun.
+define <vscale x 3 x i32> @extract_overrun_scalable_scalable(<vscale x 8 x i32> %vec) {
+  %1 = call <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, i64 6)
+  ret <vscale x 3 x i32> %1
+}
+
+; We cannot statically check whether or not an extraction of a fixed vector
+; from a scalable vector would overrun, because we can't compare the sizes of
+; the two. Therefore, this function should not raise verifier errors.
+; CHECK-NOT: experimental_vector_extract
+define <3 x i32> @extract_overrun_scalable_fixed(<vscale x 8 x i32> %vec) {
+  %1 = call <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32> %vec, i64 6)
+  ret <3 x i32> %1
+}
+
+; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
+define <8 x i32> @insert_overrun_fixed_fixed(<8 x i32> %vec, <3 x i32> %subvec) {
+  %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 6)
+  ret <8 x i32> %1
+}
+
+; CHECK: subvector operand of experimental_vector_insert would overrun the vector being inserted into.
+define <vscale x 8 x i32> @insert_overrun_scalable_scalable(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec) {
+  %1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32> %vec, <vscale x 3 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %1
+}
+
+; We cannot statically check whether or not an insertion of a fixed vector into
+; a scalable vector would overrun, because we can't compare the sizes of the
+; two. Therefore, this function should not raise verifier errors.
+; CHECK-NOT: experimental_vector_insert
+define <vscale x 8 x i32> @insert_overrun_scalable_fixed(<vscale x 8 x i32> %vec, <3 x i32> %subvec) {
+  %1 = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32> %vec, <3 x i32> %subvec, i64 6)
+  ret <vscale x 8 x i32> %1
+}
+
+declare <vscale x 3 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv3i32(<vscale x 8 x i32>, i64)
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv3i32(<vscale x 8 x i32>, <vscale x 3 x i32>, i64)
+declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv8i32.v3i32(<vscale x 8 x i32>, <3 x i32>, i64)
+declare <3 x i32> @llvm.experimental.vector.extract.nxv8i32.v3i32(<vscale x 8 x i32>, i64)
+declare <3 x i32> @llvm.experimental.vector.extract.v8i32.v3i32(<8 x i32>, i64)
+declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32>, i64)
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32>, <3 x i32>, i64)
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)