[clang] d261d3e - [clang][NFC][SVE] Add tests for operators on VLS vectors

David Truby via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 8 08:06:57 PDT 2022


Author: David Truby
Date: 2022-06-08T16:06:46+01:00
New Revision: d261d3e4a76e9b445fc8d126439115c152d869f3

URL: https://github.com/llvm/llvm-project/commit/d261d3e4a76e9b445fc8d126439115c152d869f3
DIFF: https://github.com/llvm/llvm-project/commit/d261d3e4a76e9b445fc8d126439115c152d869f3.diff

LOG: [clang][NFC][SVE] Add tests for operators on VLS vectors

This patch adds codegen tests for operators on SVE VLS vector types

Added: 
    clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
    clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
    clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
    clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
    clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/aarch64-sve-vls-arith-ops.c b/clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
new file mode 100644
index 0000000000000..38b1f970fdc0d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
@@ -0,0 +1,1984 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -mvscale-min=4 -mvscale-max=4 \
+// RUN:  -emit-llvm -o - %s | opt -S -sroa | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#define N 512
+
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// ADDITION
+
+// CHECK-LABEL: @add_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t add_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t add_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t add_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t add_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t add_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t add_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t add_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t add_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[ADD]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t add_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t add_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t add_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t add_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t add_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t add_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t add_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t add_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t add_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t add_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t add_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <32 x float> [[CONV2]], [[CONV]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[ADD]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t add_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t add_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t add_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t add_scalar_i8(fixed_int8_t a, int8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t add_scalar_i16(fixed_int16_t a, int16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t add_scalar_i32(fixed_int32_t a, int32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t add_scalar_i64(fixed_int64_t a, int64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t add_scalar_u8(fixed_uint8_t a, uint8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t add_scalar_u16(fixed_uint16_t a, uint16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t add_scalar_u32(fixed_uint32_t a, uint32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t add_scalar_u64(fixed_uint64_t a, uint64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <32 x half> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t add_scalar_f16(fixed_float16_t a, __fp16 b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t add_scalar_f32(fixed_float32_t a, float b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t add_scalar_f64(fixed_float64_t a, double b) {
+  return a + b;
+}
+
+// SUBTRACTION
+
+// CHECK-LABEL: @sub_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t sub_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t sub_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t sub_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t sub_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t sub_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t sub_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t sub_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t sub_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[SUB]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t sub_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t sub_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t sub_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t sub_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t sub_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t sub_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t sub_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t sub_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t sub_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t sub_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t sub_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[SUB]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t sub_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t sub_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t sub_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t sub_scalar_i8(fixed_int8_t a, int8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t sub_scalar_i16(fixed_int16_t a, int16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t sub_scalar_i32(fixed_int32_t a, int32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t sub_scalar_i64(fixed_int64_t a, int64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t sub_scalar_u8(fixed_uint8_t a, uint8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t sub_scalar_u16(fixed_uint16_t a, uint16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t sub_scalar_u32(fixed_uint32_t a, uint32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t sub_scalar_u64(fixed_uint64_t a, uint64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <32 x half> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t sub_scalar_f16(fixed_float16_t a, __fp16 b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <16 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t sub_scalar_f32(fixed_float32_t a, float b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t sub_scalar_f64(fixed_float64_t a, double b) {
+  return a - b;
+}
+
+// MULTIPLICATION
+
+// CHECK-LABEL: @mul_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t mul_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t mul_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t mul_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t mul_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t mul_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t mul_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t mul_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t mul_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[MUL]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t mul_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t mul_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t mul_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t mul_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t mul_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t mul_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t mul_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t mul_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t mul_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t mul_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t mul_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[MUL]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t mul_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t mul_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t mul_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t mul_scalar_i8(fixed_int8_t a, int8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t mul_scalar_i16(fixed_int16_t a, int16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t mul_scalar_i32(fixed_int32_t a, int32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t mul_scalar_i64(fixed_int64_t a, int64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t mul_scalar_u8(fixed_uint8_t a, uint8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t mul_scalar_u16(fixed_uint16_t a, uint16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t mul_scalar_u32(fixed_uint32_t a, uint32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t mul_scalar_u64(fixed_uint64_t a, uint64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <32 x half> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t mul_scalar_f16(fixed_float16_t a, __fp16 b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <16 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t mul_scalar_f32(fixed_float32_t a, float b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t mul_scalar_f64(fixed_float64_t a, double b) {
+  return a * b;
+}
+
+// DIVISION
+
+// CHECK-LABEL: @div_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t div_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t div_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t div_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t div_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t div_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t div_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t div_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t div_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[DIV]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t div_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t div_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t div_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t div_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t div_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t div_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t div_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t div_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t div_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t div_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t div_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[DIV]] to <32 x half>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t div_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t div_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t div_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t div_scalar_i8(fixed_int8_t a, int8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t div_scalar_i16(fixed_int16_t a, int16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t div_scalar_i32(fixed_int32_t a, int32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t div_scalar_i64(fixed_int64_t a, int64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t div_scalar_u8(fixed_uint8_t a, uint8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t div_scalar_u16(fixed_uint16_t a, uint16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t div_scalar_u32(fixed_uint32_t a, uint32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t div_scalar_u64(fixed_uint64_t a, uint64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <32 x half> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
+//
+fixed_float16_t div_scalar_f16(fixed_float16_t a, __fp16 b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <16 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32_t div_scalar_f32(fixed_float32_t a, float b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64_t div_scalar_f64(fixed_float64_t a, double b) {
+  return a / b;
+}
+
+// REMAINDER
+
+// CHECK-LABEL: @rem_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t rem_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t rem_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t rem_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t rem_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t rem_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t rem_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t rem_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t rem_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t rem_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t rem_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t rem_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t rem_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t rem_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t rem_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t rem_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t rem_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t rem_scalar_i8(fixed_int8_t a, int8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t rem_scalar_i16(fixed_int16_t a, int16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t rem_scalar_i32(fixed_int32_t a, int32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t rem_scalar_i64(fixed_int64_t a, int64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <64 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t rem_scalar_u8(fixed_uint8_t a, uint8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <32 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t rem_scalar_u16(fixed_uint16_t a, uint16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t rem_scalar_u32(fixed_uint32_t a, uint32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t rem_scalar_u64(fixed_uint64_t a, uint64_t b) {
+  return a % b;
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c b/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
new file mode 100644
index 0000000000000..b77d2bd63c3bd
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
@@ -0,0 +1,468 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -mvscale-min=4 -mvscale-max=4 \
+// RUN:  -emit-llvm -o - %s | opt -S -sroa | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#define N 512
+
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// AND
+
+// CHECK-LABEL: @and_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t and_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t and_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t and_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t and_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t and_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t and_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t and_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t and_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t and_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a & b;
+}
+
+// OR
+
+// CHECK-LABEL: @or_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t or_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t or_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t or_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t or_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t or_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t or_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t or_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t or_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t or_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a | b;
+}
+
+// XOR
+
+// CHECK-LABEL: @xor_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t xor_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t xor_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t xor_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t xor_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t xor_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t xor_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t xor_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t xor_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t xor_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a ^ b;
+}
+
+// NEG
+
+// CHECK-LABEL: @neg_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <8 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[NEG]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+fixed_bool_t neg_bool(fixed_bool_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <64 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t neg_i8(fixed_int8_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <32 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t neg_i16(fixed_int16_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <16 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t neg_i32(fixed_int32_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <8 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t neg_i64(fixed_int64_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <64 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t neg_u8(fixed_uint8_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <32 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t neg_u16(fixed_uint16_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <16 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t neg_u32(fixed_uint32_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @neg_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <8 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[NEG]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t neg_u64(fixed_uint64_t a) {
+  return ~a;
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-vls-compare-ops.c b/clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
new file mode 100644
index 0000000000000..f6c6672336dde
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
@@ -0,0 +1,1010 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -mvscale-min=4 -mvscale-max=4 \
+// RUN:  -emit-llvm -o - %s | opt -S -sroa | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#define N 512
+
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// EQ
+
+// CHECK-LABEL: @eq_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t eq_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t eq_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t eq_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t eq_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t eq_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t eq_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t eq_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t eq_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t eq_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
+// CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t eq_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t eq_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t eq_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a == b;
+}
+
+// NEQ
+
+// CHECK-LABEL: @neq_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t neq_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t neq_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t neq_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t neq_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t neq_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t neq_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t neq_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t neq_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t neq_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp une <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
+// CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t neq_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp une <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t neq_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp une <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t neq_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a != b;
+}
+
+// LT
+
+// CHECK-LABEL: @lt_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t lt_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t lt_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t lt_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t lt_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t lt_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t lt_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t lt_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t lt_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t lt_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
+// CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t lt_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t lt_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t lt_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a < b;
+}
+
+// LEQ
+
+// CHECK-LABEL: @leq_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t leq_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t leq_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t leq_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t leq_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t leq_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t leq_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t leq_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t leq_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t leq_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
+// CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t leq_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t leq_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t leq_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a <= b;
+}
+
+// GT
+
+// CHECK-LABEL: @gt_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t gt_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t gt_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t gt_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t gt_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t gt_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t gt_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t gt_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t gt_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t gt_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
+// CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t gt_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t gt_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t gt_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a > b;
+}
+
+// GEQ
+
+// CHECK-LABEL: @geq_bool(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
+// CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <8 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+//
+fixed_bool_t geq_bool(fixed_bool_t a, fixed_bool_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t geq_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t geq_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t geq_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t geq_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t geq_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t geq_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t geq_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t geq_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = fpext <32 x half> [[A]] to <32 x float>
+// CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <32 x float> [[CONV]], [[CONV2]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
+// CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t geq_f16(fixed_float16_t a, fixed_float16_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <16 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t geq_f32(fixed_float32_t a, fixed_float32_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <8 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t geq_f64(fixed_float64_t a, fixed_float64_t b) {
+  return a >= b;
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-vls-shift-ops.c b/clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
new file mode 100644
index 0000000000000..b431649cc7395
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
@@ -0,0 +1,650 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -mvscale-min=4 -mvscale-max=4 \
+// RUN:  -emit-llvm -o - %s | opt -S -sroa | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+#define N 512
+
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// CHECK-LABEL: @lshift_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t lshift_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t rshift_i8(fixed_int8_t a, fixed_int8_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t lshift_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t rshift_u8(fixed_uint8_t a, fixed_uint8_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t lshift_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t rshift_i16(fixed_int16_t a, fixed_int16_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t lshift_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t rshift_u16(fixed_uint16_t a, fixed_uint16_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t lshift_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t rshift_i32(fixed_int32_t a, fixed_int32_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t lshift_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t rshift_u32(fixed_uint32_t a, fixed_uint32_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t lshift_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t rshift_i64(fixed_int64_t a, fixed_int64_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t lshift_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t rshift_u64(fixed_uint64_t a, fixed_uint64_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t lshift_i8_rsplat(fixed_int8_t a, int8_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t lshift_i8_lsplat(fixed_int8_t a, int8_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t rshift_i8_rsplat(fixed_int8_t a, int8_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8_t rshift_i8_lsplat(fixed_int8_t a, int8_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t lshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t lshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t rshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.experimental.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8_t rshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_i16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t lshift_i16_rsplat(fixed_int16_t a, int16_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t lshift_i16_lsplat(fixed_int16_t a, int16_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t rshift_i16_rsplat(fixed_int16_t a, int16_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16_t rshift_i16_lsplat(fixed_int16_t a, int16_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t lshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t lshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t rshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16_t rshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_i32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t lshift_i32_rsplat(fixed_int32_t a, int32_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t lshift_i32_lsplat(fixed_int32_t a, int32_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t rshift_i32_rsplat(fixed_int32_t a, int32_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32_t rshift_i32_lsplat(fixed_int32_t a, int32_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t lshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t lshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t rshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32_t rshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_i64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t lshift_i64_rsplat(fixed_int64_t a, int64_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t lshift_i64_lsplat(fixed_int64_t a, int64_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t rshift_i64_rsplat(fixed_int64_t a, int64_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64_t rshift_i64_lsplat(fixed_int64_t a, int64_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t lshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t lshift_u64_lsplat(fixed_uint64_t a, uint64_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t rshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64_t rshift_u64_lsplat(fixed_uint64_t a, uint64_t b) {
+  return b >> a;
+}

diff  --git a/clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c
new file mode 100644
index 0000000000000..444fab942d782
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c
@@ -0,0 +1,117 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
+// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -mvscale-min=4 -mvscale-max=4 \
+// RUN:  -emit-llvm -o - %s | opt -S -sroa | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+#include <stddef.h>
+
+#define N 512
+
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// CHECK-LABEL: @subscript_int16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <32 x i16> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i16 [[VECEXT]]
+//
+int16_t subscript_int16(fixed_int16_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_uint16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.experimental.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <32 x i16> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i16 [[VECEXT]]
+//
+uint16_t subscript_uint16(fixed_uint16_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_int32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i32> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i32 [[VECEXT]]
+//
+int32_t subscript_int32(fixed_int32_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_uint32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i32> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i32 [[VECEXT]]
+//
+uint32_t subscript_uint32(fixed_uint32_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_int64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <8 x i64> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i64 [[VECEXT]]
+//
+int64_t subscript_int64(fixed_int64_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_uint64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <8 x i64> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i64 [[VECEXT]]
+//
+uint64_t subscript_uint64(fixed_uint64_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_float16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.experimental.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <32 x half> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret half [[VECEXT]]
+//
+__fp16 subscript_float16(fixed_float16_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_float32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x float> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret float [[VECEXT]]
+//
+float subscript_float32(fixed_float32_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_float64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <8 x double> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret double [[VECEXT]]
+//
+double subscript_float64(fixed_float64_t a, size_t b) {
+  return a[b];
+}


        


More information about the cfe-commits mailing list