[clang] e30efa0 - [NFC] Fix intel intrinsics test for PowerPC

Qiu Chaofan via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 16 00:20:17 PST 2022


Author: Qiu Chaofan
Date: 2022-02-16T16:16:30+08:00
New Revision: e30efa030465cfcfeb8e7ca4407c87176248ca57

URL: https://github.com/llvm/llvm-project/commit/e30efa030465cfcfeb8e7ca4407c87176248ca57
DIFF: https://github.com/llvm/llvm-project/commit/e30efa030465cfcfeb8e7ca4407c87176248ca57.diff

LOG: [NFC] Fix intel intrinsics test for PowerPC

These tests are dumped without optimization, which makes them too
lengthy and contain meaningless load/stores. Clean them up to prepare
for future headers update.

Added: 
    

Modified: 
    clang/test/CodeGen/PowerPC/ppc-emmintrin.c
    clang/test/CodeGen/PowerPC/ppc-mmintrin.c
    clang/test/CodeGen/PowerPC/ppc-smmintrin.c
    clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
    clang/test/CodeGen/PowerPC/ppc-xmmintrin.c

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
index 3342c180f640d..6e4a2e4309d4a 100644
--- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
@@ -47,120 +46,38 @@ test_add() {
 
 // CHECK-LABEL: @test_add
 
-// CHECK: define available_externally <2 x i64> @_mm_add_epi64(<2 x i64> noundef [[REG1:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG2:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1]], <2 x i64>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG2]], <2 x i64>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG3]], align 16
-// CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG4]], align 16
-// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = add <2 x i64> [[REG5]], [[REG6]]
-// CHECK-NEXT: ret <2 x i64> [[REG7]]
-
-// CHECK: define available_externally <2 x i64> @_mm_add_epi32(<2 x i64> noundef [[REG8:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG9:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG8]], <2 x i64>* [[REG10:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG9]], <2 x i64>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG10]], align 16
-// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG12]] to <4 x i32>
-// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG11]], align 16
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG14]] to <4 x i32>
-// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = add <4 x i32> [[REG13]], [[REG15]]
-// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG16]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG17]]
-
-// CHECK: define available_externally <2 x i64> @_mm_add_epi16(<2 x i64> noundef [[REG18:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG19:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG18]], <2 x i64>* [[REG20:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG19]], <2 x i64>* [[REG21:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG20]], align 16
-// CHECK-NEXT: [[REG23:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG22]] to <8 x i16>
-// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG21]], align 16
-// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG24]] to <8 x i16>
-// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = add <8 x i16> [[REG23]], [[REG25]]
-// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG26]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG27]]
-
-// CHECK: define available_externally <2 x i64> @_mm_add_epi8(<2 x i64> noundef [[REG28:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG29:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG28]], <2 x i64>* [[REG30:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG29]], <2 x i64>* [[REG31:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG30]], align 16
-// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG32]] to <16 x i8>
-// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG31]], align 16
-// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG34]] to <16 x i8>
-// CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = add <16 x i8> [[REG33]], [[REG35]]
-// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG36]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG37]]
-
-// CHECK: define available_externally <2 x double> @_mm_add_pd(<2 x double> noundef [[REG38:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG39:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG38]], <2 x double>* [[REG40:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG39]], <2 x double>* [[REG41:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG40]], align 16
-// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG41]], align 16
-// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = fadd <2 x double> [[REG42]], [[REG43]]
-// CHECK-NEXT: ret <2 x double> [[REG44]]
-
-// CHECK: define available_externally <2 x double> @_mm_add_sd(<2 x double> noundef [[REG45:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG46:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG45]], <2 x double>* [[REG47:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG46]], <2 x double>* [[REG48:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG49:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG47]], align 16
-// CHECK-NEXT: [[REG50:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG49]], i32 0
-// CHECK-NEXT: [[REG51:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG48]], align 16
-// CHECK-NEXT: [[REG52:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG51]], i32 0
-// CHECK-NEXT: [[REG53:[0-9a-zA-Z_%.]+]] = fadd double [[REG50]], [[REG52]]
-// CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG47]], align 16
-// CHECK-NEXT: [[REG55:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG54]], double [[REG53]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG55]], <2 x double>* [[REG47]], align 16
-// CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG47]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG56]]
-
-// CHECK: define available_externally i64 @_mm_add_si64(i64 noundef [[REG57:[0-9a-zA-Z_%.]+]], i64 noundef [[REG58:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG57]], i64* [[REG59:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG58]], i64* [[REG60:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG59]], align 8
-// CHECK-NEXT: [[REG62:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG60]], align 8
-// CHECK-NEXT: [[REG63:[0-9a-zA-Z_%.]+]] = add i64 [[REG61]], [[REG62]]
-// CHECK-NEXT: ret i64 [[REG63]]
-
-// CHECK: define available_externally <2 x i64> @_mm_adds_epi16(<2 x i64> noundef [[REG64:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG65:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG64]], <2 x i64>* [[REG66:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG65]], <2 x i64>* [[REG67:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG68:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG66]], align 16
-// CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG68]] to <8 x i16>
-// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG67]], align 16
-// CHECK-NEXT: [[REG71:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG70]] to <8 x i16>
-// CHECK-NEXT: [[REG72:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> noundef [[REG69]], <8 x i16> noundef [[REG71]])
-// CHECK-NEXT: [[REG73:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG72]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG73]]
-
-// CHECK: define available_externally <2 x i64> @_mm_adds_epi8(<2 x i64> noundef [[REG74:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG75:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG74]], <2 x i64>* [[REG76:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG75]], <2 x i64>* [[REG77:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG78:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG76]], align 16
-// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG78]] to <16 x i8>
-// CHECK-NEXT: [[REG80:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG77]], align 16
-// CHECK-NEXT: [[REG81:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG80]] to <16 x i8>
-// CHECK-NEXT: [[REG82:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG79]], <16 x i8> noundef [[REG81]])
-// CHECK-NEXT: [[REG83:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG82]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG83]]
-
-// CHECK: define available_externally <2 x i64> @_mm_adds_epu16(<2 x i64> noundef [[REG84:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG85:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG84]], <2 x i64>* [[REG86:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG85]], <2 x i64>* [[REG87:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG86]], align 16
-// CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG88]] to <8 x i16>
-// CHECK-NEXT: [[REG90:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG87]], align 16
-// CHECK-NEXT: [[REG91:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG90]] to <8 x i16>
-// CHECK-NEXT: [[REG92:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG89]], <8 x i16> noundef [[REG91]])
-// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG92]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG93]]
-
-// CHECK: define available_externally <2 x i64> @_mm_adds_epu8(<2 x i64> noundef [[REG94:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG95:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG94]], <2 x i64>* [[REG96:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG95]], <2 x i64>* [[REG97:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG98:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG96]], align 16
-// CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG98]] to <16 x i8>
-// CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG97]], align 16
-// CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG100]] to <16 x i8>
-// CHECK-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG99]], <16 x i8> noundef [[REG101]])
-// CHECK-NEXT: [[REG103:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG102]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG103]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi64
+// CHECK: add <2 x i64>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi32
+// CHECK: add <4 x i32>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi16
+// CHECK: add <8 x i16>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi8
+// CHECK: add <16 x i8>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_add_pd
+// CHECK: fadd <2 x double>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_add_sd
+// CHECK: fadd double
+
+// CHECK-LABEL: define available_externally i64 @_mm_add_si64
+// CHECK: add i64
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi16
+// CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi8
+// CHECK: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu16
+// CHECK: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu8
+// CHECK: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])
 
 void __attribute__((noinline))
 test_avg() {
@@ -170,27 +87,11 @@ test_avg() {
 
 // CHECK-LABEL: @test_avg
 
-// CHECK: define available_externally <2 x i64> @_mm_avg_epu16(<2 x i64> noundef [[REG104:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG105:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG104]], <2 x i64>* [[REG106:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG105]], <2 x i64>* [[REG107:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG108:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG106]], align 16
-// CHECK-NEXT: [[REG109:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG108]] to <8 x i16>
-// CHECK-NEXT: [[REG110:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG107]], align 16
-// CHECK-NEXT: [[REG111:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG110]] to <8 x i16>
-// CHECK-NEXT: [[REG112:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG109]], <8 x i16> noundef [[REG111]])
-// CHECK-NEXT: [[REG113:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG112]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG113]]
-
-// CHECK: define available_externally <2 x i64> @_mm_avg_epu8(<2 x i64> noundef [[REG114:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG115:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG114]], <2 x i64>* [[REG116:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG115]], <2 x i64>* [[REG117:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG118:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG116]], align 16
-// CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG118]] to <16 x i8>
-// CHECK-NEXT: [[REG120:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG117]], align 16
-// CHECK-NEXT: [[REG121:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG120]] to <16 x i8>
-// CHECK-NEXT: [[REG122:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG119]], <16 x i8> noundef [[REG121]])
-// CHECK-NEXT: [[REG123:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG122]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG123]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu16
+// CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu8
+// CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])
 
 void __attribute__((noinline))
 test_bs() {
@@ -200,88 +101,28 @@ test_bs() {
 
 // CHECK-LABEL: @test_bs
 
-// CHECK: define available_externally <2 x i64> @_mm_bslli_si128(<2 x i64> noundef [[REG124:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG125:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG124]], <2 x i64>* [[REG126:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG125]], i32* [[REG127:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG128:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG129:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG127]], align 4
-// CHECK-NEXT: [[REG130:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG129]], 16
-// CHECK-NEXT: br i1 [[REG130]], label %[[REG131:[0-9a-zA-Z_%.]+]], label %[[REG132:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG131]]:
-// CHECK-NEXT: [[REG133:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG126]], align 16
-// CHECK-NEXT: [[REG134:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG133]] to <16 x i8>
-// CHECK-NEXT: [[REG135:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG127]], align 4
-// CHECK-NEXT: [[REG136:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef [[REG134]], <16 x i8> noundef zeroinitializer, i32 noundef zeroext [[REG135]])
-// CHECK-NEXT: store <16 x i8> [[REG136]], <16 x i8>* [[REG137:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG138:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG132]]:
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG137]], align 16
-// CHECK-NEXT: br label %[[REG138:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG138]]:
-// CHECK-NEXT: [[REG139:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG137]], align 16
-// CHECK-NEXT: [[REG140:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG139]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG140]]
-
-// CHECK: define available_externally <2 x i64> @_mm_bsrli_si128(<2 x i64> noundef [[REG141:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG142:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG141]], <2 x i64>* [[REG143:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG142]], i32* [[REG144:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG145:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG146:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG144]], align 4
-// CHECK-NEXT: [[REG147:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG146]], 16
-// CHECK-NEXT: br i1 [[REG147]], label %[[REG148:[0-9a-zA-Z_%.]+]], label %[[REG149:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG148]]:
-
-// CHECK-LE-NEXT: load
-// CHECK-LE-NEXT: call i1 @llvm.is.constant
-// CHECK-LE-NEXT: br i1 %[[REG1896a:[0-9a-zA-Z_%.]+]], label %[[REG150:[0-9a-zA-Z_%.]+]], label %[[REG151:[0-9a-zA-Z_%.]+]]
-// CHECK-LE: [[REG150]]:
-// CHECK-LE: [[REG152:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG143]], align 16
-// CHECK-LE-NEXT: [[REG153:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG152]] to <16 x i8>
-// CHECK-LE-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG144]], align 4
-// CHECK-LE-NEXT: [[REG155:[0-9a-zA-Z_%.]+]] = sub nsw i32 16, [[REG154]]
-// CHECK-LE-NEXT: [[REG156:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef [[REG153]], i32 noundef zeroext [[REG155]])
-// CHECK-LE-NEXT: store <16 x i8> [[REG156]], <16 x i8>* [[REG157:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: br label %[[REG158:[0-9a-zA-Z_%.]+]]
-// CHECK-LE: [[REG151]]:
-// CHECK-LE: [[REG159:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG144]], align 4
-// CHECK-LE-NEXT: [[REG160:[0-9a-zA-Z_%.]+]] = mul nsw i32 [[REG159]], 8
-// CHECK-LE-NEXT: [[REG161:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG160]] to i8
-// CHECK-LE-NEXT: [[REG162:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext [[REG161]])
-// CHECK-LE-NEXT: store <16 x i8> [[REG162]], <16 x i8>* [[REG163:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: [[REG164:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG143]], align 16
-// CHECK-LE-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG164]] to <16 x i8>
-// CHECK-LE-NEXT: [[REG166:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG163]], align 16
-// CHECK-LE-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG165]], <16 x i8> noundef [[REG166]])
-// CHECK-LE-NEXT: store <16 x i8> [[REG167]], <16 x i8>* [[REG157]], align 16
-// CHECK-LE-NEXT: br label %[[REG158:[0-9a-zA-Z_%.]+]]
-// CHECK-LE: [[REG158]]:
-// CHECK-LE: br label %[[REG168:[0-9a-zA-Z_%.]+]]
-// CHECK-LE: [[REG149]]:
-// CHECK-LE: store <16 x i8> zeroinitializer, <16 x i8>* [[REG157]], align 16
-// CHECK-LE-NEXT: br label %[[REG168:[0-9a-zA-Z_%.]+]]
-// CHECK-LE: [[REG168]]:
-// CHECK-LE: [[REG169:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG157]], align 16
-// CHECK-LE-NEXT: [[REG170:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG169]] to <2 x i64>
-// CHECK-LE-NEXT: ret <2 x i64> [[REG170]]
-
-// CHECK-BE: [[REG171:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG144]], align 4
-// CHECK-BE-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = mul nsw i32 [[REG171]], 8
-// CHECK-BE-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG172]] to i8
-// CHECK-BE-NEXT: [[REG174:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext [[REG173]])
-// CHECK-BE-NEXT: store <16 x i8> [[REG174]], <16 x i8>* [[REG175:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG143]], align 16
-// CHECK-BE-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG176]] to <16 x i8>
-// CHECK-BE-NEXT: [[REG178:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG175]], align 16
-// CHECK-BE-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG177]], <16 x i8> noundef [[REG178]])
-// CHECK-BE-NEXT: store <16 x i8> [[REG179]], <16 x i8>* [[REG180:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: br label %[[REG181:[0-9a-zA-Z_%.]+]]
-// CHECK-BE: [[REG149]]:
-// CHECK-BE: store <16 x i8> zeroinitializer, <16 x i8>* [[REG180]], align 16
-// CHECK-BE-NEXT: br label %[[REG181:[0-9a-zA-Z_%.]+]]
-// CHECK-BE: [[REG181]]:
-// CHECK-BE: [[REG182:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG180]], align 16
-// CHECK-BE-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG182]] to <2 x i64>
-// CHECK-BE-NEXT: ret <2 x i64> [[REG183]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_bslli_si128
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP]]
+// CHECK: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
+// CHECK: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_bsrli_si128
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP]]
+// CHECK-LE: call i1 @llvm.is.constant
+// CHECK-LE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}}
+// CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]])
+// CHECK-LE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8
+// CHECK-LE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
+// CHECK-LE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
+// CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
+// CHECK-LE: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-BE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8
+// CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
+// CHECK-BE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
+// CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
+// CHECK-BE: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
 
 void __attribute__((noinline))
 test_cast() {
@@ -295,41 +136,23 @@ test_cast() {
 
 // CHECK-LABEL: @test_cast
 
-// CHECK: define available_externally <4 x float> @_mm_castpd_ps(<2 x double> noundef [[REG184:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG184]], <2 x double>* [[REG185:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG185]], align 16
-// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG186]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG187]]
-
-// CHECK: define available_externally <2 x i64> @_mm_castpd_si128(<2 x double> noundef [[REG188:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG188]], <2 x double>* [[REG189:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG190:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG189]], align 16
-// CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG190]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG191]]
-
-// CHECK: define available_externally <2 x double> @_mm_castps_pd(<4 x float> noundef [[REG192:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG192]], <4 x float>* [[REG193:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG193]], align 16
-// CHECK-NEXT: [[REG195:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG194]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG195]]
-
-// CHECK: define available_externally <2 x i64> @_mm_castps_si128(<4 x float> noundef [[REG196:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG196]], <4 x float>* [[REG197:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG197]], align 16
-// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG198]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG199]]
-
-// CHECK: define available_externally <2 x double> @_mm_castsi128_pd(<2 x i64> noundef [[REG200:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG200]], <2 x i64>* [[REG201:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG201]], align 16
-// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG202]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG203]]
-
-// CHECK: define available_externally <4 x float> @_mm_castsi128_ps(<2 x i64> noundef [[REG204:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG204]], <2 x i64>* [[REG205:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG205]], align 16
-// CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG206]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG207]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_castpd_ps
+// CHECK: bitcast <2 x double> %{{[0-9a-zA-Z_.]+}} to <4 x float>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_castpd_si128
+// CHECK: bitcast <2 x double> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_castps_pd
+// CHECK: bitcast <4 x float> %{{[0-9a-zA-Z_.]+}} to <2 x double>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_castps_si128
+// CHECK: bitcast <4 x float> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_castsi128_pd
+// CHECK: bitcast <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double>
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_castsi128_ps
+// CHECK: bitcast <2 x i64> %{{[0-9a-zA-Z_.]+}} to <4 x float>
 
 void __attribute__((noinline))
 test_cmp() {
@@ -370,420 +193,147 @@ test_cmp() {
 
 // CHECK-LABEL: @test_cmp
 
-// CHECK: define available_externally <2 x i64> @_mm_cmpeq_epi32
-// CHECK: [[REG208:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])
-// CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG208]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG209]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmpeq_epi16
-// CHECK: [[REG210:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])(<8 x i16> noundef {{[0-9a-zA-Z_%.]+}}, <8 x i16> noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG210]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG211]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmpeq_epi8
-// CHECK: [[REG212:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpeq(signed char vector[16], signed char vector[16])
-// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG212]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG213]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmpgt_epi32
-// CHECK: [[REG214:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])
-// CHECK-NEXT: [[REG215:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG214]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG215]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmpgt_epi16
-// CHECK: [[REG216:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG217:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG216]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG217]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmpgt_epi8
-// CHECK: [[REG218:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])
-// CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG218]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG219]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmplt_epi32
-// CHECK: [[REG220:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(int vector[4], int vector[4])
-// CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG220]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG221]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmplt_epi16
-// CHECK: [[REG222:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG222]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG223]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cmplt_epi8
-// CHECK: [[REG224:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])
-// CHECK-NEXT: [[REG225:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG224]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG225]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpeq_pd
-// CHECK: [[REG226:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG227:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG226]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG227]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpeq_sd(<2 x double> noundef [[REG228:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG229:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG228]], <2 x double>* [[REG230:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG229]], <2 x double>* [[REG231:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG232:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG230]], align 16
-// CHECK-NEXT: [[REG233:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG232]], i32 0
-// CHECK-NEXT: [[REG234:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG233]])
-// CHECK-NEXT: store <2 x double> [[REG234]], <2 x double>* [[REG235:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG236:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG231]], align 16
-// CHECK-NEXT: [[REG237:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG236]], i32 0
-// CHECK-NEXT: [[REG238:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG237]])
-// CHECK-NEXT: store <2 x double> [[REG238]], <2 x double>* [[REG239:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG240:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG235]], align 16
-// CHECK-NEXT: [[REG241:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG239]], align 16
-// CHECK-NEXT: [[REG242:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> noundef [[REG240]], <2 x double> noundef [[REG241]])
-// CHECK-NEXT: [[REG243:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG242]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG243]], <2 x double>* [[REG244:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG244]], align 16
-// CHECK-NEXT: [[REG246:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG245]], i32 0
-// CHECK-NEXT: [[REG247:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG230]], align 16
-// CHECK-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG247]], i32 1
-// CHECK-NEXT: [[REG249:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG246]], double noundef [[REG248]])
-// CHECK-NEXT: ret <2 x double> [[REG249]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpge_pd
-// CHECK: [[REG250:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG250]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG251]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpge_sd(<2 x double> noundef [[REG252:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG253:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG252]], <2 x double>* [[REG254:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG253]], <2 x double>* [[REG255:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG256:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG254]], align 16
-// CHECK-NEXT: [[REG257:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG256]], i32 0
-// CHECK-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG257]])
-// CHECK-NEXT: store <2 x double> [[REG258]], <2 x double>* [[REG259:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG260:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG255]], align 16
-// CHECK-NEXT: [[REG261:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG260]], i32 0
-// CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG261]])
-// CHECK-NEXT: store <2 x double> [[REG262]], <2 x double>* [[REG263:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG259]], align 16
-// CHECK-NEXT: [[REG265:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG263]], align 16
-// CHECK-NEXT: [[REG266:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpge(double vector[2], double vector[2])(<2 x double> noundef [[REG264]], <2 x double> noundef [[REG265]])
-// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG266]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG267]], <2 x double>* [[REG268:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG269:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG268]], align 16
-// CHECK-NEXT: [[REG270:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG269]], i32 0
-// CHECK-NEXT: [[REG271:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG254]], align 16
-// CHECK-NEXT: [[REG272:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG271]], i32 1
-// CHECK-NEXT: [[REG273:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG270]], double noundef [[REG272]])
-// CHECK-NEXT: ret <2 x double> [[REG273]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpgt_pd
-// CHECK: [[REG274:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG275:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG274]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG275]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpgt_sd(<2 x double> noundef [[REG276:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG277:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG276]], <2 x double>* [[REG278:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG277]], <2 x double>* [[REG279:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG278]], align 16
-// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG280]], i32 0
-// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG281]])
-// CHECK-NEXT: store <2 x double> [[REG282]], <2 x double>* [[REG283:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG284:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG279]], align 16
-// CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG284]], i32 0
-// CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG285]])
-// CHECK-NEXT: store <2 x double> [[REG286]], <2 x double>* [[REG287:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG288:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG283]], align 16
-// CHECK-NEXT: [[REG289:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG287]], align 16
-// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])(<2 x double> noundef [[REG288]], <2 x double> noundef [[REG289]])
-// CHECK-NEXT: [[REG291:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG290]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG291]], <2 x double>* [[REG292:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG293:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG292]], align 16
-// CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG293]], i32 0
-// CHECK-NEXT: [[REG295:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG278]], align 16
-// CHECK-NEXT: [[REG296:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG295]], i32 1
-// CHECK-NEXT: [[REG297:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG294]], double noundef [[REG296]])
-// CHECK-NEXT: ret <2 x double> [[REG297]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmple_pd
-// CHECK: [[REG298:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmple(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG299:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG298]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG299]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmple_sd(<2 x double> noundef [[REG300:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG301:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG300]], <2 x double>* [[REG302:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG301]], <2 x double>* [[REG303:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG302]], align 16
-// CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG304]], i32 0
-// CHECK-NEXT: [[REG306:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG305]])
-// CHECK-NEXT: store <2 x double> [[REG306]], <2 x double>* [[REG307:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG303]], align 16
-// CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG308]], i32 0
-// CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG309]])
-// CHECK-NEXT: store <2 x double> [[REG310]], <2 x double>* [[REG311:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG312:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG307]], align 16
-// CHECK-NEXT: [[REG313:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG311]], align 16
-// CHECK-NEXT: [[REG314:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmple(double vector[2], double vector[2])(<2 x double> noundef [[REG312]], <2 x double> noundef [[REG313]])
-// CHECK-NEXT: [[REG315:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG314]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG315]], <2 x double>* [[REG316:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG316]], align 16
-// CHECK-NEXT: [[REG318:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG317]], i32 0
-// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG302]], align 16
-// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG319]], i32 1
-// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG318]], double noundef [[REG320]])
-// CHECK-NEXT: ret <2 x double> [[REG321]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmplt_pd
-// CHECK: [[REG322:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG322]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG323]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmplt_sd(<2 x double> noundef [[REG324:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG325:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG324]], <2 x double>* [[REG326:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG325]], <2 x double>* [[REG327:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG328:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG326]], align 16
-// CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG328]], i32 0
-// CHECK-NEXT: [[REG330:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG329]])
-// CHECK-NEXT: store <2 x double> [[REG330]], <2 x double>* [[REG331:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG332:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG327]], align 16
-// CHECK-NEXT: [[REG333:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG332]], i32 0
-// CHECK-NEXT: [[REG334:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG333]])
-// CHECK-NEXT: store <2 x double> [[REG334]], <2 x double>* [[REG335:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG336:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG331]], align 16
-// CHECK-NEXT: [[REG337:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG335]], align 16
-// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmplt(double vector[2], double vector[2])(<2 x double> noundef [[REG336]], <2 x double> noundef [[REG337]])
-// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG338]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG339]], <2 x double>* [[REG340:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG340]], align 16
-// CHECK-NEXT: [[REG342:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG341]], i32 0
-// CHECK-NEXT: [[REG343:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG326]], align 16
-// CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG343]], i32 1
-// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG342]], double noundef [[REG344]])
-// CHECK-NEXT: ret <2 x double> [[REG345]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpneq_pd(<2 x double> noundef [[REG346:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG347:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG346]], <2 x double>* [[REG348:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG347]], <2 x double>* [[REG349:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG350:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG348]], align 16
-// CHECK-NEXT: [[REG351:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG349]], align 16
-// CHECK-NEXT: [[REG352:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> noundef [[REG350]], <2 x double> noundef [[REG351]])
-// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG352]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG353]], <2 x double>* [[REG354:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG354]], align 16
-// CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG354]], align 16
-// CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_nor(double vector[2], double vector[2])(<2 x double> noundef [[REG355]], <2 x double> noundef [[REG356]])
-// CHECK-NEXT: ret <2 x double> [[REG357]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpneq_sd(<2 x double> noundef [[REG358:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG359:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG358]], <2 x double>* [[REG360:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG359]], <2 x double>* [[REG361:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG362:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG360]], align 16
-// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG362]], i32 0
-// CHECK-NEXT: [[REG364:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG363]])
-// CHECK-NEXT: store <2 x double> [[REG364]], <2 x double>* [[REG365:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG366:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG361]], align 16
-// CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG366]], i32 0
-// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG367]])
-// CHECK-NEXT: store <2 x double> [[REG368]], <2 x double>* [[REG369:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG370:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG365]], align 16
-// CHECK-NEXT: [[REG371:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG369]], align 16
-// CHECK-NEXT: [[REG372:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> noundef [[REG370]], <2 x double> noundef [[REG371]])
-// CHECK-NEXT: [[REG373:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG372]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG373]], <2 x double>* [[REG374:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG375:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG374]], align 16
-// CHECK-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG374]], align 16
-// CHECK-NEXT: [[REG377:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_nor(double vector[2], double vector[2])(<2 x double> noundef [[REG375]], <2 x double> noundef [[REG376]])
-// CHECK-NEXT: store <2 x double> [[REG377]], <2 x double>* [[REG374]], align 16
-// CHECK-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG374]], align 16
-// CHECK-NEXT: [[REG379:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG378]], i32 0
-// CHECK-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG360]], align 16
-// CHECK-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG380]], i32 1
-// CHECK-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG379]], double noundef [[REG381]])
-// CHECK-NEXT: ret <2 x double> [[REG382]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpnge_pd
-// CHECK: [[REG383:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG383]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG384]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpnge_sd(<2 x double> noundef [[REG385:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG386:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG385]], <2 x double>* [[REG387:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG386]], <2 x double>* [[REG388:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG387]], align 16
-// CHECK-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG389]], i32 0
-// CHECK-NEXT: [[REG391:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG390]])
-// CHECK-NEXT: store <2 x double> [[REG391]], <2 x double>* [[REG392:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG393:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG388]], align 16
-// CHECK-NEXT: [[REG394:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG393]], i32 0
-// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG394]])
-// CHECK-NEXT: store <2 x double> [[REG395]], <2 x double>* [[REG396:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG392]], align 16
-// CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG396]], align 16
-// CHECK-NEXT: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmplt(double vector[2], double vector[2])(<2 x double> noundef [[REG397]], <2 x double> noundef [[REG398]])
-// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG400]], <2 x double>* [[REG401:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG402:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG401]], align 16
-// CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG402]], i32 0
-// CHECK-NEXT: [[REG404:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG387]], align 16
-// CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG404]], i32 1
-// CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG403]], double noundef [[REG405]])
-// CHECK-NEXT: ret <2 x double> [[REG406]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpngt_pd(<2 x double> noundef [[REG407:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG408:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG407]], <2 x double>* [[REG409:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG408]], <2 x double>* [[REG410:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG409]], align 16
-// CHECK-NEXT: [[REG412:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG410]], align 16
-// CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmple(double vector[2], double vector[2])(<2 x double> noundef [[REG411]], <2 x double> noundef [[REG412]])
-// CHECK-NEXT: [[REG414:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG413]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG414]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpngt_sd(<2 x double> noundef [[REG415:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG416:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG415]], <2 x double>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG416]], <2 x double>* [[REG418:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG417]], align 16
-// CHECK-NEXT: [[REG420:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG419]], i32 0
-// CHECK-NEXT: [[REG421:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG420]])
-// CHECK-NEXT: store <2 x double> [[REG421]], <2 x double>* [[REG422:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG423:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG418]], align 16
-// CHECK-NEXT: [[REG424:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG423]], i32 0
-// CHECK-NEXT: [[REG425:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG424]])
-// CHECK-NEXT: store <2 x double> [[REG425]], <2 x double>* [[REG426:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG427:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG422]], align 16
-// CHECK-NEXT: [[REG428:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG426]], align 16
-// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmple(double vector[2], double vector[2])(<2 x double> noundef [[REG427]], <2 x double> noundef [[REG428]])
-// CHECK-NEXT: [[REG430:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG429]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG430]], <2 x double>* [[REG431:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG431]], align 16
-// CHECK-NEXT: [[REG433:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG432]], i32 0
-// CHECK-NEXT: [[REG434:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG417]], align 16
-// CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG434]], i32 1
-// CHECK-NEXT: [[REG436:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG433]], double noundef [[REG435]])
-// CHECK-NEXT: ret <2 x double> [[REG436]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpnle_pd
-// CHECK: [[REG437:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG438:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG437]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG438]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpnle_sd(<2 x double> noundef [[REG439:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG440:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG439]], <2 x double>* [[REG441:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG440]], <2 x double>* [[REG442:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG443:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG441]], align 16
-// CHECK-NEXT: [[REG444:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG443]], i32 0
-// CHECK-NEXT: [[REG445:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG444]])
-// CHECK-NEXT: store <2 x double> [[REG445]], <2 x double>* [[REG446:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG447:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG442]], align 16
-// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG447]], i32 0
-// CHECK-NEXT: [[REG449:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG448]])
-// CHECK-NEXT: store <2 x double> [[REG449]], <2 x double>* [[REG450:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG451:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG446]], align 16
-// CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG450]], align 16
-// CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpge(double vector[2], double vector[2])(<2 x double> noundef [[REG451]], <2 x double> noundef [[REG452]])
-// CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG453]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG454]], <2 x double>* [[REG455:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG455]], align 16
-// CHECK-NEXT: [[REG457:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG456]], i32 0
-// CHECK-NEXT: [[REG458:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG441]], align 16
-// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG458]], i32 1
-// CHECK-NEXT: [[REG460:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG457]], double noundef [[REG459]])
-// CHECK-NEXT: ret <2 x double> [[REG460]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpnlt_pd
-// CHECK: [[REG461:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
-// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG461]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG462]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpnlt_sd(<2 x double> noundef [[REG463:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG464:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG463]], <2 x double>* [[REG465:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG464]], <2 x double>* [[REG466:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG467:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG465]], align 16
-// CHECK-NEXT: [[REG468:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG467]], i32 0
-// CHECK-NEXT: [[REG469:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG468]])
-// CHECK-NEXT: store <2 x double> [[REG469]], <2 x double>* [[REG470:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG471:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG466]], align 16
-// CHECK-NEXT: [[REG472:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG471]], i32 0
-// CHECK-NEXT: [[REG473:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG472]])
-// CHECK-NEXT: store <2 x double> [[REG473]], <2 x double>* [[REG474:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG470]], align 16
-// CHECK-NEXT: [[REG476:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG474]], align 16
-// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpge(double vector[2], double vector[2])(<2 x double> noundef [[REG475]], <2 x double> noundef [[REG476]])
-// CHECK-NEXT: [[REG478:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG477]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG478]], <2 x double>* [[REG479:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG479]], align 16
-// CHECK-NEXT: [[REG481:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG480]], i32 0
-// CHECK-NEXT: [[REG482:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG465]], align 16
-// CHECK-NEXT: [[REG483:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG482]], i32 1
-// CHECK-NEXT: [[REG484:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG481]], double noundef [[REG483]])
-// CHECK-NEXT: ret <2 x double> [[REG484]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpord_pd(<2 x double> noundef [[REG485:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG486:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG485]], <2 x double>* [[REG487:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG486]], <2 x double>* [[REG488:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG487]], align 16
-// CHECK-NEXT: [[REG490:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG487]], align 16
-// CHECK-NEXT: [[REG491:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> noundef [[REG489]], <2 x double> noundef [[REG490]])
-// CHECK-NEXT: store <2 x i64> [[REG491]], <2 x i64>* [[REG492:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG488]], align 16
-// CHECK-NEXT: [[REG494:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG488]], align 16
-// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> noundef [[REG493]], <2 x double> noundef [[REG494]])
-// CHECK-NEXT: store <2 x i64> [[REG495]], <2 x i64>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG492]], align 16
-// CHECK-NEXT: [[REG498:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG496]], align 16
-// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_and(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG497]], <2 x i64> noundef [[REG498]])
-// CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG499]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG500]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpord_sd(<2 x double> noundef [[REG501:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG502:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG501]], <2 x double>* [[REG503:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG502]], <2 x double>* [[REG504:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG503]], align 16
-// CHECK-NEXT: [[REG506:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG505]], i32 0
-// CHECK-NEXT: [[REG507:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG506]])
-// CHECK-NEXT: [[REG508:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG504]], align 16
-// CHECK-NEXT: [[REG509:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG508]], i32 0
-// CHECK-NEXT: [[REG510:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG509]])
-// CHECK-NEXT: [[REG511:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_cmpord_pd(<2 x double> noundef [[REG507]], <2 x double> noundef [[REG510]])
-// CHECK-NEXT: store <2 x double> [[REG511]], <2 x double>* [[REG512:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG513:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG512]], align 16
-// CHECK-NEXT: [[REG514:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG513]], i32 0
-// CHECK-NEXT: [[REG515:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG503]], align 16
-// CHECK-NEXT: [[REG516:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG515]], i32 1
-// CHECK-NEXT: [[REG517:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG514]], double noundef [[REG516]])
-// CHECK-NEXT: ret <2 x double> [[REG517]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpunord_pd(<2 x double> noundef [[REG518:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG519:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG518]], <2 x double>* [[REG520:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG519]], <2 x double>* [[REG521:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG522:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG520]], align 16
-// CHECK-NEXT: [[REG523:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG520]], align 16
-// CHECK-NEXT: [[REG524:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> noundef [[REG522]], <2 x double> noundef [[REG523]])
-// CHECK-NEXT: store <2 x i64> [[REG524]], <2 x i64>* [[REG525:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG526:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG521]], align 16
-// CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG521]], align 16
-// CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])(<2 x double> noundef [[REG526]], <2 x double> noundef [[REG527]])
-// CHECK-NEXT: store <2 x i64> [[REG528]], <2 x i64>* [[REG529:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG530:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG525]], align 16
-// CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG525]], align 16
-// CHECK-NEXT: [[REG532:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_nor(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG530]], <2 x i64> noundef [[REG531]])
-// CHECK-NEXT: store <2 x i64> [[REG532]], <2 x i64>* [[REG525]], align 16
-// CHECK-NEXT: [[REG533:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG525]], align 16
-// CHECK-NEXT: [[REG534:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG529]], align 16
-// CHECK-NEXT: [[REG535:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_orc(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG533]], <2 x i64> noundef [[REG534]])
-// CHECK-NEXT: [[REG536:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG535]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG536]]
-
-// CHECK: define available_externally <2 x double> @_mm_cmpunord_sd(<2 x double> noundef [[REG537:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG538:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG537]], <2 x double>* [[REG539:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG538]], <2 x double>* [[REG540:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG539]], align 16
-// CHECK-NEXT: [[REG542:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG541]], i32 0
-// CHECK-NEXT: [[REG543:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG542]])
-// CHECK-NEXT: [[REG544:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG540]], align 16
-// CHECK-NEXT: [[REG545:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG544]], i32 0
-// CHECK-NEXT: [[REG546:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG545]])
-// CHECK-NEXT: [[REG547:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_cmpunord_pd(<2 x double> noundef [[REG543]], <2 x double> noundef [[REG546]])
-// CHECK-NEXT: store <2 x double> [[REG547]], <2 x double>* [[REG548:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG549:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG548]], align 16
-// CHECK-NEXT: [[REG550:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG549]], i32 0
-// CHECK-NEXT: [[REG551:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG539]], align 16
-// CHECK-NEXT: [[REG552:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG551]], i32 1
-// CHECK-NEXT: [[REG553:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG550]], double noundef [[REG552]])
-// CHECK-NEXT: ret <2 x double> [[REG553]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi32
+// CHECK: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi16
+// CHECK: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi8
+// CHECK: call <16 x i8> @vec_cmpeq(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi32
+// CHECK: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi16
+// CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi8
+// CHECK: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi32
+// CHECK: call <4 x i32> @vec_cmplt(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi16
+// CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi8
+// CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_pd
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_pd
+// CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_pd
+// CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_pd
+// CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_pd
+// CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_pd
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+// CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+// CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_pd
+// CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_pd
+// CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_pd
+// CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_pd
+// CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_pd
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+// CHECK: call <2 x i64> @vec_and(unsigned long long vector[2], unsigned long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @_mm_cmpord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_pd
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+// CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
+// CHECK: call <2 x i64> @vec_nor(unsigned long long vector[2], unsigned long long vector[2])
+// CHECK: call <2 x i64> @vec_orc(unsigned long long vector[2], unsigned long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @_mm_cmpunord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_comi() {
@@ -797,41 +347,29 @@ test_comi() {
 
 // CHECK-LABEL: @test_comi
 
-// CHECK: define available_externally signext i32 @_mm_comieq_sd(<2 x double> noundef [[REG554:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG555:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG554]], <2 x double>* [[REG556:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG555]], <2 x double>* [[REG557:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG558:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG556]], align 16
-// CHECK-NEXT: [[REG559:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG558]], i32 0
-// CHECK-NEXT: [[REG560:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG557]], align 16
-// CHECK-NEXT: [[REG561:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG560]], i32 0
-// CHECK-NEXT: [[REG562:[0-9a-zA-Z_%.]+]] = fcmp oeq double [[REG559]], [[REG561]]
-// CHECK-NEXT: [[REG563:[0-9a-zA-Z_%.]+]] = zext i1 [[REG562]] to i32
-// CHECK-NEXT: ret i32 [[REG563]]
-
-// CHECK: define available_externally signext i32 @_mm_comige_sd
-// CHECK: [[REG564:[0-9a-zA-Z_%.]+]] = fcmp oge double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG565:[0-9a-zA-Z_%.]+]] = zext i1 [[REG564]] to i32
-// CHECK-NEXT: ret i32 [[REG565]]
-
-// CHECK: define available_externally signext i32 @_mm_comigt_sd
-// CHECK: [[REG566:[0-9a-zA-Z_%.]+]] = fcmp ogt double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG567:[0-9a-zA-Z_%.]+]] = zext i1 [[REG566]] to i32
-// CHECK-NEXT: ret i32 [[REG567]]
-
-// CHECK: define available_externally signext i32 @_mm_comile_sd
-// CHECK: [[REG568:[0-9a-zA-Z_%.]+]] = fcmp ole double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG569:[0-9a-zA-Z_%.]+]] = zext i1 [[REG568]] to i32
-// CHECK-NEXT: ret i32 [[REG569]]
-
-// CHECK: define available_externally signext i32 @_mm_comilt_sd
-// CHECK: [[REG570:[0-9a-zA-Z_%.]+]] = fcmp olt double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG571:[0-9a-zA-Z_%.]+]] = zext i1 [[REG570]] to i32
-// CHECK-NEXT: ret i32 [[REG571]]
-
-// CHECK: define available_externally signext i32 @_mm_comineq_sd
-// CHECK: [[REG572:[0-9a-zA-Z_%.]+]] = fcmp une double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG573:[0-9a-zA-Z_%.]+]] = zext i1 [[REG572]] to i32
-// CHECK-NEXT: ret i32 [[REG573]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_comieq_sd
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq double
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comige_sd
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge double
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comigt_sd
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt double
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comile_sd
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole double
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comilt_sd
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt double
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comineq_sd
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une double
+// CHECK: zext i1 %[[CMP]] to i32
 
 void __attribute__((noinline))
 test_control() {
@@ -843,24 +381,17 @@ test_control() {
 
 // CHECK-LABEL: @test_control
 
-// CHECK: define available_externally void @_mm_clflush(i8* noundef [[REG574:[0-9a-zA-Z_%.]+]])
-// CHECK: store i8* [[REG574]], i8** [[REG575:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG576:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG575]], align 8
-// CHECK-NEXT: call void asm sideeffect "dcbf 0,$0", "b,~{memory}"(i8* [[REG576]])
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally void @_mm_clflush
+// CHECK: call void asm sideeffect "dcbf 0,$0", "b,~{memory}"(i8* %{{[0-9a-zA-Z_.]+}})
 
-// CHECK: define available_externally void @_mm_lfence()
+// CHECK-LABEL: define available_externally void @_mm_lfence()
 // CHECK: fence release
-// CHECK-NEXT: ret void
 
-// CHECK: define available_externally void @_mm_mfence()
+// CHECK-LABEL: define available_externally void @_mm_mfence()
 // CHECK: fence seq_cst
-// CHECK-NEXT: ret void
 
-// CHECK: define available_externally void @_mm_pause()
-// CHECK: [[REG577:[0-9a-zA-Z_%.]+]] = call i64 asm sideeffect "\09mfppr\09$0;   or 31,31,31;   isync;   lwsync;   isync;   mtppr\09$0;", "=r,~{memory}"()
-// CHECK-NEXT: store i64 [[REG577]], i64* [[REG578:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally void @_mm_pause()
+// CHECK: call i64 asm sideeffect "\09mfppr\09$0;   or 31,31,31;   isync;   lwsync;   isync;   mtppr\09$0;", "=r,~{memory}"()
 
 void __attribute__((noinline))
 test_converts() {
@@ -897,375 +428,125 @@ test_converts() {
 
 // CHECK-LABEL: @test_converts
 
-// CHECK: define available_externally <2 x double> @_mm_cvtepi32_pd(<2 x i64> noundef [[REG579:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG579]], <2 x i64>* [[REG580:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG581:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG580]], align 16
-// CHECK-NEXT: [[REG582:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG581]] to <4 x i32>
-// CHECK-NEXT: [[REG583:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_unpackh(int vector[4])(<4 x i32> noundef [[REG582]])
-// CHECK-NEXT: store <2 x i64> [[REG583]], <2 x i64>* [[REG584:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG585:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG584]], align 16
-// CHECK-NEXT: [[REG586:[0-9a-zA-Z_%.]+]] = sitofp <2 x i64> [[REG585]] to <2 x double>
-// CHECK-NEXT: [[REG587:[0-9a-zA-Z_%.]+]] = fmul <2 x double> [[REG586]], <double 1.000000e+00, double 1.000000e+00>
-// CHECK-NEXT: ret <2 x double> [[REG587]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtepi32_ps(<2 x i64> noundef [[REG588:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG588]], <2 x i64>* [[REG589:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG589]], align 16
-// CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG590]] to <4 x i32>
-// CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG591]], i32 0)
-// CHECK-NEXT: ret <4 x float> [[REG592]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cvtpd_epi32(<2 x double> noundef [[REG593:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG593]], <2 x double>* [[REG594:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: %[[REG595:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG594]], align 16
-// CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_rint(double vector[2])(<2 x double> noundef %[[REG595:[0-9a-zA-Z_%.]+]])
-// CHECK-NEXT: store <2 x double> [[REG596]], <2 x double>* [[REG597:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG598:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG597]], align 16
-// CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> [[REG599]])
-// CHECK-NEXT: store <4 x i32> [[REG600]], <4 x i32>* [[REG601:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG602:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG601]], align 16
-// CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG601]], align 16
-// CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(int vector[4], int vector[4])(<4 x i32> noundef [[REG602]], <4 x i32> noundef [[REG603]])
-// CHECK-NEXT: store <4 x i32> [[REG604]], <4 x i32>* [[REG601]], align 16
-// CHECK-NEXT: [[REG605:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG601]], align 16
-// CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG605]] to <2 x i64>
-// CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef [[REG606]], <2 x i64> noundef zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG607]], <4 x i32>* [[REG608:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG608]], align 16
-// CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG609]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG610]]
-
-// CHECK: define available_externally i64 @_mm_cvtpd_pi32(<2 x double> noundef [[REG611:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG611]], <2 x double>* [[REG612:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG613:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG612]], align 16
-// CHECK-NEXT: [[REG614:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_cvtpd_epi32(<2 x double> noundef [[REG613]])
-// CHECK-NEXT: store <2 x i64> [[REG614]], <2 x i64>* [[REG615:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG616:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG615]], align 16
-// CHECK-NEXT: [[REG617:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG616]], i32 0
-// CHECK-NEXT: ret i64 [[REG617]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpd_ps(<2 x double> noundef [[REG618:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG618]], <2 x double>* [[REG619:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG620:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: %[[REG621:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG619]], align 16
-// CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = call <4 x i32> asm "xvcvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %[[REG621:[0-9a-zA-Z_%.]+]])
-// CHECK-NEXT: store <4 x i32> [[REG622]], <4 x i32>* [[REG623:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG624:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG623]], align 16
-// CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG623]], align 16
-// CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(int vector[4], int vector[4])(<4 x i32> noundef [[REG624]], <4 x i32> noundef [[REG625]])
-// CHECK-NEXT: store <4 x i32> [[REG626]], <4 x i32>* [[REG623]], align 16
-// CHECK-NEXT: [[REG627:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG623]], align 16
-// CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG627]] to <2 x i64>
-// CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef [[REG628]], <2 x i64> noundef zeroinitializer)
-// CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG629]] to <4 x float>
-// CHECK-NEXT: store <4 x float> [[REG630]], <4 x float>* [[REG631:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG632:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG631]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG632]]
-
-// CHECK: define available_externally <2 x double> @_mm_cvtpi32_pd(i64 noundef [[REG633:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG633]], i64* [[REG634:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG635:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG634]], align 8
-// CHECK-NEXT: [[REG636:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG635]])
-// CHECK-NEXT: [[REG637:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG636]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG637]], <4 x i32>* [[REG638:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG639:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG638]], align 16
-// CHECK-NEXT: [[REG640:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_unpackl(int vector[4])(<4 x i32> noundef [[REG639]])
-// CHECK-NEXT: store <2 x i64> [[REG640]], <2 x i64>* [[REG641:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG642:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG641]], align 16
-// CHECK-NEXT: [[REG643:[0-9a-zA-Z_%.]+]] = sitofp <2 x i64> [[REG642]] to <2 x double>
-// CHECK-NEXT: [[REG644:[0-9a-zA-Z_%.]+]] = fmul <2 x double> [[REG643]], <double 1.000000e+00, double 1.000000e+00>
-// CHECK-NEXT: store <2 x double> [[REG644]], <2 x double>* [[REG645:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG646:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG645]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG646]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cvtps_epi32(<4 x float> noundef [[REG647:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG647]], <4 x float>* [[REG648:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG649:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG648]], align 16
-// CHECK-NEXT: [[REG650:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> noundef [[REG649]])
-// CHECK-NEXT: store <4 x float> [[REG650]], <4 x float>* [[REG651:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG652:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG651]], align 16
-// CHECK-NEXT: [[REG653:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG652]], i32 0)
-// CHECK-NEXT: store <4 x i32> [[REG653]], <4 x i32>* [[REG654:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG654]], align 16
-// CHECK-NEXT: [[REG656:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG655]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG656]]
-
-// CHECK: define available_externally <2 x double> @_mm_cvtps_pd(<4 x float> noundef [[REG657:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG657]], <4 x float>* [[REG658:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: %[[REG659:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG658]], align 16
-// CHECK-NEXT: store <4 x float> %[[REG659:[0-9a-zA-Z_%.]+]], <4 x float>* [[REG660:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG661:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG660]], align 16
-// CHECK-NEXT: [[REG662:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG660]], align 16
-
-// CHECK-BE-NEXT: [[REG663:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])(<4 x float> noundef [[REG664:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG665:[0-9a-zA-Z_%.]+]])
-// CHECK-BE-NEXT: store <4 x float> [[REG663]], <4 x float>* [[REG666:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: [[REG667:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG666]], align 16
-// CHECK-BE-NEXT: [[REG668:[0-9a-zA-Z_%.]+]] = call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> [[REG667]])
-// CHECK-BE-NEXT: store <2 x double> [[REG668]], <2 x double>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: [[REG670:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG669]], align 16
-// CHECK-BE-NEXT: ret <2 x double> [[REG670]]
-
-// CHECK-LE: [[REG671:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG661]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG672:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG662]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG673:[0-9a-zA-Z_%.]+]] = shufflevector <4 x i32> [[REG671]], <4 x i32> [[REG672]], <4 x i32> <i32 5, i32 6, i32 7, i32 0>
-// CHECK-LE-NEXT: [[REG674:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG673]] to <4 x float>
-// CHECK-LE-NEXT: store <4 x float> [[REG674]], <4 x float>* [[REG666:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: [[REG675:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG660]], align 16
-// CHECK-LE-NEXT: [[REG676:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG666]], align 16
-// CHECK-LE-NEXT: [[REG677:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG675]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG678:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG676]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG679:[0-9a-zA-Z_%.]+]] = shufflevector <4 x i32> [[REG677]], <4 x i32> [[REG678]], <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-// CHECK-LE-NEXT: [[REG680:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG679]] to <4 x float>
-// CHECK-LE-NEXT: store <4 x float> [[REG680]], <4 x float>* [[REG666]], align 16
-// CHECK-LE-NEXT: [[REG681:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG666]], align 16
-// CHECK-LE-NEXT: [[REG682:[0-9a-zA-Z_%.]+]] = call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> [[REG681]])
-// CHECK-LE-NEXT: store <2 x double> [[REG682]], <2 x double>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: [[REG683:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG669]], align 16
-// CHECK-LE-NEXT: ret <2 x double> [[REG683]]
-
-// CHECK: define available_externally double @_mm_cvtsd_f64(<2 x double> noundef [[REG684:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG684]], <2 x double>* [[REG685:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG686:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG685]], align 16
-// CHECK-NEXT: [[REG687:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG686]], i32 0
-// CHECK-NEXT: ret double [[REG687]]
-
-// CHECK: define available_externally signext i32 @_mm_cvtsd_si32(<2 x double> noundef [[REG688:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG688]], <2 x double>* [[REG689:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG690:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG689]], align 16
-// CHECK-NEXT: [[REG691:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_rint(double vector[2])(<2 x double> noundef [[REG690]])
-// CHECK-NEXT: store <2 x double> [[REG691]], <2 x double>* [[REG692:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG693:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG692]], align 16
-// CHECK-NEXT: [[REG694:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG693]], i32 0
-// CHECK-NEXT: [[REG695:[0-9a-zA-Z_%.]+]] = fptosi double [[REG694]] to i32
-// CHECK-NEXT: store i32 [[REG695]], i32* [[REG696:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG697:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG696]], align 4
-// CHECK-NEXT: ret i32 [[REG697]]
-
-// CHECK: define available_externally i64 @_mm_cvtsd_si64(<2 x double> noundef [[REG698:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG698]], <2 x double>* [[REG699:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG700:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG699]], align 16
-// CHECK-NEXT: [[REG701:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_rint(double vector[2])(<2 x double> noundef [[REG700]])
-// CHECK-NEXT: store <2 x double> [[REG701]], <2 x double>* [[REG702:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG703:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG702]], align 16
-// CHECK-NEXT: [[REG704:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG703]], i32 0
-// CHECK-NEXT: [[REG705:[0-9a-zA-Z_%.]+]] = fptosi double [[REG704]] to i64
-// CHECK-NEXT: store i64 [[REG705]], i64* [[REG706:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG707:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG706]], align 8
-// CHECK-NEXT: ret i64 [[REG707]]
-
-// CHECK: define available_externally i64 @_mm_cvtsd_si64x(<2 x double> noundef [[REG708:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG708]], <2 x double>* [[REG709:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG710:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG709]], align 16
-// CHECK-NEXT: [[REG711:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtsd_si64(<2 x double> noundef [[REG710]])
-// CHECK-NEXT: ret i64 [[REG711]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtsd_ss(<4 x float> noundef [[REG712:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG713:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG712]], <4 x float>* [[REG714:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG713]], <2 x double>* [[REG715:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: %[[REG716:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG714]], align 16
-// CHECK-NEXT: store <4 x float> %[[REG716:[0-9a-zA-Z_%.]+]], <4 x float>* [[REG717:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG718:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG715]], align 16
-
-// CHECK-LE-NEXT: [[REG719:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef [[REG718]], i32 noundef zeroext 0)
-// CHECK-LE-NEXT: store <2 x double> [[REG719]], <2 x double>* [[REG720:[0-9a-zA-Z_%.]+]], align 16
-
-// CHECK-BE-NEXT: [[REG721:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG718]], i32 0
-// CHECK-BE-NEXT: [[REG722:[0-9a-zA-Z_%.]+]] = fptrunc double [[REG721]] to float
-
-// CHECK-NEXT: [[REG723:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG717]], align 16
-
-// CHECK-BE-NEXT: [[REG724:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG723]], float [[REG722]], i32 0
-// CHECK-BE-NEXT: store <4 x float> [[REG724]], <4 x float>* [[REG717]], align 16
-
-// CHECK-NEXT: [[REG725:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG717]], align 16
-
-// CHECK-BE-NEXT: ret <4 x float> [[REG725]]
-
-// CHECK-LE-NEXT: [[REG726:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG723]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG727:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG725]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG728:[0-9a-zA-Z_%.]+]] = shufflevector <4 x i32> [[REG726]], <4 x i32> [[REG727]], <4 x i32> <i32 5, i32 6, i32 7, i32 0>
-// CHECK-LE-NEXT: [[REG729:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG728]] to <4 x float>
-// CHECK-LE-NEXT: store <4 x float> [[REG729]], <4 x float>* [[REG717]], align 16
-// CHECK-LE-NEXT: [[REG730:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG720]], align 16
-// CHECK-LE-NEXT: [[REG731:[0-9a-zA-Z_%.]+]] = call <4 x float> asm "xscvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> [[REG730]])
-// CHECK-LE-NEXT: store <4 x float> [[REG731]], <4 x float>* [[REG732:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: [[REG733:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG717]], align 16
-// CHECK-LE-NEXT: [[REG734:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG732]], align 16
-// CHECK-LE-NEXT: [[REG735:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG733]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG736:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG734]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG737:[0-9a-zA-Z_%.]+]] = shufflevector <4 x i32> [[REG735]], <4 x i32> [[REG736]], <4 x i32> <i32 7, i32 0, i32 1, i32 2>
-// CHECK-LE-NEXT: [[REG738:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG737]] to <4 x float>
-// CHECK-LE-NEXT: store <4 x float> [[REG738]], <4 x float>* [[REG717]], align 16
-// CHECK-LE-NEXT: [[REG739:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG717]], align 16
-// CHECK-LE-NEXT: ret <4 x float> [[REG739]]
-
-// CHECK: define available_externally signext i32 @_mm_cvtsi128_si32(<2 x i64> noundef [[REG740:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG740]], <2 x i64>* [[REG741:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG742:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG741]], align 16
-// CHECK-NEXT: [[REG743:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG742]] to <4 x i32>
-// CHECK-NEXT: [[REG744:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG743]], i32 0
-// CHECK-NEXT: ret i32 [[REG744]]
-
-// CHECK: define available_externally i64 @_mm_cvtsi128_si64(<2 x i64> noundef [[REG745:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG745]], <2 x i64>* [[REG746:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG747:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG746]], align 16
-// CHECK-NEXT: [[REG748:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG747]], i32 0
-// CHECK-NEXT: ret i64 [[REG748]]
-
-// CHECK: define available_externally i64 @_mm_cvtsi128_si64x(<2 x i64> noundef [[REG749:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG749]], <2 x i64>* [[REG750:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG751:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG750]], align 16
-// CHECK-NEXT: [[REG752:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG751]], i32 0
-// CHECK-NEXT: ret i64 [[REG752]]
-
-// CHECK: define available_externally <2 x double> @_mm_cvtsi32_sd(<2 x double> noundef [[REG753:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG754:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG753]], <2 x double>* [[REG755:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG754]], i32* [[REG756:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG757:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG755]], align 16
-// CHECK-NEXT: store <2 x double> [[REG757]], <2 x double>* [[REG758:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG759:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG756]], align 4
-// CHECK-NEXT: [[REG760:[0-9a-zA-Z_%.]+]] = sitofp i32 [[REG759]] to double
-// CHECK-NEXT: store double [[REG760]], double* [[REG761:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG762:[0-9a-zA-Z_%.]+]] = load double, double* [[REG761]], align 8
-// CHECK-NEXT: [[REG763:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG758]], align 16
-// CHECK-NEXT: [[REG764:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG763]], double [[REG762]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG764]], <2 x double>* [[REG758]], align 16
-// CHECK-NEXT: [[REG765:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG758]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG765]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cvtsi32_si128(i32 noundef signext [[REG766:[0-9a-zA-Z_%.]+]])
-// CHECK: store i32 [[REG766]], i32* [[REG767:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG768:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG767]], align 4
-// CHECK-NEXT: [[REG769:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi32(i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext [[REG768]])
-// CHECK-NEXT: ret <2 x i64> [[REG769]]
-
-// CHECK: define available_externally <2 x double> @_mm_cvtsi64_sd(<2 x double> noundef [[REG770:[0-9a-zA-Z_%.]+]], i64 noundef [[REG771:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG770]], <2 x double>* [[REG772:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i64 [[REG771]], i64* [[REG773:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG774:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG772]], align 16
-// CHECK-NEXT: store <2 x double> [[REG774]], <2 x double>* [[REG775:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG776:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG773]], align 8
-// CHECK-NEXT: [[REG777:[0-9a-zA-Z_%.]+]] = sitofp i64 [[REG776]] to double
-// CHECK-NEXT: store double [[REG777]], double* [[REG778:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG779:[0-9a-zA-Z_%.]+]] = load double, double* [[REG778]], align 8
-// CHECK-NEXT: [[REG780:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG775]], align 16
-// CHECK-NEXT: [[REG781:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG780]], double [[REG779]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG781]], <2 x double>* [[REG775]], align 16
-// CHECK-NEXT: [[REG782:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG775]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG782]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cvtsi64_si128(i64 noundef [[REG783:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG783]], i64* [[REG784:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG785:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG784]], align 8
-// CHECK-NEXT: [[REG786:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG785]], i32 0
-// CHECK-NEXT: [[REG787:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG786]], i64 0, i32 1
-// CHECK-NEXT: store <2 x i64> [[REG787]], <2 x i64>* [[REG788:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG789:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG788]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG789]]
-
-// CHECK: define available_externally <2 x double> @_mm_cvtsi64x_sd(<2 x double> noundef [[REG790:[0-9a-zA-Z_%.]+]], i64 noundef [[REG791:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG790]], <2 x double>* [[REG792:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i64 [[REG791]], i64* [[REG793:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG794:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG792]], align 16
-// CHECK-NEXT: [[REG795:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG793]], align 8
-// CHECK-NEXT: [[REG796:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_cvtsi64_sd(<2 x double> noundef [[REG794]], i64 noundef [[REG795]])
-// CHECK-NEXT: ret <2 x double> [[REG796]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cvtsi64x_si128(i64 noundef [[REG797:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG797]], i64* [[REG798:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG799:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG798]], align 8
-// CHECK-NEXT: [[REG800:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG799]], i32 0
-// CHECK-NEXT: [[REG801:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG800]], i64 0, i32 1
-// CHECK-NEXT: store <2 x i64> [[REG801]], <2 x i64>* [[REG802:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG803:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG802]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG803]]
-
-// CHECK: define available_externally <2 x double> @_mm_cvtss_sd(<2 x double> noundef [[REG804:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG805:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG804]], <2 x double>* [[REG806:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG805]], <4 x float>* [[REG807:[0-9a-zA-Z_%.]+]], align 16
-
-// CHECK-BE-NEXT: [[REG808:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG806]], align 16
-// CHECK-BE-NEXT: store <2 x double> [[REG808]], <2 x double>* [[REG809:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: [[REG810:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG807]], align 16
-// CHECK-BE-NEXT: [[REG811:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG810]], i32 0
-// CHECK-BE-NEXT: [[REG812:[0-9a-zA-Z_%.]+]] = fpext float [[REG811]] to double
-// CHECK-BE-NEXT: [[REG813:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG809]], align 16
-// CHECK-BE-NEXT: [[REG814:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG813]], double [[REG812]], i32 0
-// CHECK-BE-NEXT: store <2 x double> [[REG814]], <2 x double>* [[REG809]], align 16
-// CHECK-BE-NEXT: [[REG815:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG809]], align 16
-// CHECK-BE-NEXT: ret <2 x double> [[REG815]]
-
-// CHECK-LE-NEXT: [[REG816:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG807]], align 16
-// CHECK-LE-NEXT: [[REG817:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG816:[0-9a-zA-Z_%.]+]], i32 noundef zeroext 0)
-// CHECK-LE-NEXT: store <4 x float> [[REG817]], <4 x float>* [[REG818:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: [[REG819:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG818]], align 16
-// CHECK-LE-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = call <2 x double> asm "xscvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> [[REG819]])
-// CHECK-LE-NEXT: store <2 x double> [[REG820]], <2 x double>* [[REG809:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG809]], align 16
-// CHECK-LE-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG806]], align 16
-// CHECK-LE-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])(<2 x double> noundef [[REG821]], <2 x double> noundef [[REG822]])
-// CHECK-LE-NEXT: ret <2 x double> [[REG823]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cvttpd_epi32(<2 x double> noundef [[REG824:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG824]], <2 x double>* [[REG825:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG826:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: %[[REG827:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG825]], align 16
-// CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %[[REG827:[0-9a-zA-Z_%.]+]])
-// CHECK-NEXT: store <4 x i32> [[REG828]], <4 x i32>* [[REG829:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG830:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG829]], align 16
-// CHECK-NEXT: [[REG831:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG829]], align 16
-// CHECK-NEXT: [[REG832:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeo(int vector[4], int vector[4])(<4 x i32> noundef [[REG830]], <4 x i32> noundef [[REG831]])
-// CHECK-NEXT: store <4 x i32> [[REG832]], <4 x i32>* [[REG829]], align 16
-// CHECK-NEXT: [[REG833:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG829]], align 16
-// CHECK-NEXT: [[REG834:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG833]] to <2 x i64>
-// CHECK-NEXT: [[REG835:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef [[REG834]], <2 x i64> noundef zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG835]], <4 x i32>* [[REG836:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG837:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG836]], align 16
-// CHECK-NEXT: [[REG838:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG837]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG838]]
-
-// CHECK: define available_externally i64 @_mm_cvttpd_pi32(<2 x double> noundef [[REG839:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG839]], <2 x double>* [[REG840:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG841:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG840]], align 16
-// CHECK-NEXT: [[REG842:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_cvttpd_epi32(<2 x double> noundef [[REG841]])
-// CHECK-NEXT: store <2 x i64> [[REG842]], <2 x i64>* [[REG843:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG844:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG843]], align 16
-// CHECK-NEXT: [[REG845:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG844]], i32 0
-// CHECK-NEXT: ret i64 [[REG845]]
-
-// CHECK: define available_externally <2 x i64> @_mm_cvttps_epi32(<4 x float> noundef [[REG846:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG846]], <4 x float>* [[REG847:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG848:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG847]], align 16
-// CHECK-NEXT: [[REG849:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG848]], i32 0)
-// CHECK-NEXT: store <4 x i32> [[REG849]], <4 x i32>* [[REG850:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG851:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG850]], align 16
-// CHECK-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG851]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG852]]
-
-// CHECK: define available_externally signext i32 @_mm_cvttsd_si32(<2 x double> noundef [[REG853:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG853]], <2 x double>* [[REG854:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG855:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG854]], align 16
-// CHECK-NEXT: [[REG856:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG855]], i32 0
-// CHECK-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = fptosi double [[REG856]] to i32
-// CHECK-NEXT: store i32 [[REG857]], i32* [[REG858:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG859:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG858]], align 4
-// CHECK-NEXT: ret i32 [[REG859]]
-
-// CHECK: define available_externally i64 @_mm_cvttsd_si64(<2 x double> noundef [[REG860:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG860]], <2 x double>* [[REG861:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG861]], align 16
-// CHECK-NEXT: [[REG863:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG862]], i32 0
-// CHECK-NEXT: [[REG864:[0-9a-zA-Z_%.]+]] = fptosi double [[REG863]] to i64
-// CHECK-NEXT: store i64 [[REG864]], i64* [[REG865:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG866:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG865]], align 8
-// CHECK-NEXT: ret i64 [[REG866]]
-
-// CHECK: define available_externally i64 @_mm_cvttsd_si64x(<2 x double> noundef [[REG867:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG867]], <2 x double>* [[REG868:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG869:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG868]], align 16
-// CHECK-NEXT: [[REG870:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvttsd_si64(<2 x double> noundef [[REG869]])
-// CHECK-NEXT: ret i64 [[REG870]]
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtepi32_pd
+// CHECK: call <2 x i64> @vec_unpackh(int vector[4])
+// CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double>
+// CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtepi32_ps
+// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtpd_epi32
+// CHECK: call <2 x double> @vec_rint(double vector[2])
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtpd_pi32
+// CHECK: call <2 x i64> @_mm_cvtpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpd_ps
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> asm "xvcvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtpi32_pd
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_unpackl(int vector[4])
+// CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z._]+}} to <2 x double>
+// CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtps_epi32
+// CHECK: call <4 x float> @vec_rint(float vector[4])
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtps_pd
+// CHECK-BE: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
+// CHECK-BE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+// CHECK-LE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally double @_mm_cvtsd_f64
+// CHECK: extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvtsd_si32
+// CHECK: call <2 x double> @vec_rint(double vector[2])
+// CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64
+// CHECK: call <2 x double> @vec_rint(double vector[2])
+// CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64x
+// CHECK: call i64 @_mm_cvtsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsd_ss
+// CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = fptrunc double %[[EXT]] to float
+// CHECK-BE: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %[[TRUNC]], i32 0
+// CHECK-LE: call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+// CHECK-LE: call <4 x float> asm "xscvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvtsi128_si32
+// CHECK: extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64x
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi32_sd
+// CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to double
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi32_si128
+// CHECK: call <2 x i64> @_mm_set_epi32(i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64_sd
+// CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to double
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64_si128
+// CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64x_sd
+// CHECK: call <2 x double> @_mm_cvtsi64_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64x_si128
+// CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtss_sd
+// CHECK-BE: fpext float %{{[0-9a-zA-Z_.]+}} to double
+// CHECK-LE: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK-LE: call <2 x double> asm "xscvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
+// CHECK-LE: call <2 x double> @vec_mergel(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttpd_epi32
+// CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"
+// CHECK: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvttpd_pi32
+// CHECK: call <2 x i64> @_mm_cvttpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttps_epi32
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvttsd_si32
+// CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64
+// CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64x
+// CHECK: call i64 @_mm_cvttsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_div() {
@@ -1275,27 +556,11 @@ test_div() {
 
 // CHECK-LABEL: @test_div
 
-// CHECK: define available_externally <2 x double> @_mm_div_pd(<2 x double> noundef [[REG871:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG872:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG871]], <2 x double>* [[REG873:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG872]], <2 x double>* [[REG874:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG875:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG873]], align 16
-// CHECK-NEXT: [[REG876:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG874]], align 16
-// CHECK-NEXT: [[REG877:[0-9a-zA-Z_%.]+]] = fdiv <2 x double> [[REG875]], [[REG876]]
-// CHECK-NEXT: ret <2 x double> [[REG877]]
-
-// CHECK: define available_externally <2 x double> @_mm_div_sd(<2 x double> noundef [[REG878:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG879:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG878]], <2 x double>* [[REG880:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG879]], <2 x double>* [[REG881:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG882:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG880]], align 16
-// CHECK-NEXT: [[REG883:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG882]], i32 0
-// CHECK-NEXT: [[REG884:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG881]], align 16
-// CHECK-NEXT: [[REG885:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG884]], i32 0
-// CHECK-NEXT: [[REG886:[0-9a-zA-Z_%.]+]] = fdiv double [[REG883]], [[REG885]]
-// CHECK-NEXT: [[REG887:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG880]], align 16
-// CHECK-NEXT: [[REG888:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG887]], double [[REG886]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG888]], <2 x double>* [[REG880]], align 16
-// CHECK-NEXT: [[REG889:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG880]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG889]]
+// CHECK-LABEL: define available_externally <2 x double> @_mm_div_pd
+// CHECK: fdiv <2 x double>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_div_sd
+// CHECK: fdiv double
 
 void __attribute__((noinline))
 test_extract() {
@@ -1304,16 +569,10 @@ test_extract() {
 
 // CHECK-LABEL: @test_extract
 
-// CHECK: define available_externally signext i32 @_mm_extract_epi16(<2 x i64> noundef [[REG890:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG891:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG890]], <2 x i64>* [[REG892:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG891]], i32* [[REG893:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG894:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG892]], align 16
-// CHECK-NEXT: [[REG895:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG894]] to <8 x i16>
-// CHECK-NEXT: [[REG896:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG893]], align 4
-// CHECK-NEXT: [[REG897:[0-9a-zA-Z_%.]+]] = and i32 [[REG896]], 7
-// CHECK-NEXT: [[REG898:[0-9a-zA-Z_%.]+]] = extractelement <8 x i16> [[REG895]], i32 [[REG897]]
-// CHECK-NEXT: [[REG899:[0-9a-zA-Z_%.]+]] = zext i16 [[REG898]] to i32
-// CHECK-NEXT: ret i32 [[REG899]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_extract_epi16
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 7
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <8 x i16> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
+// CHECK: zext i16 %[[EXT]] to i32
 
 void __attribute__((noinline))
 test_insert() {
@@ -1322,23 +581,9 @@ test_insert() {
 
 // CHECK-LABEL: @test_insert
 
-// CHECK: define available_externally <2 x i64> @_mm_insert_epi16(<2 x i64> noundef [[REG900:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG901:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG902:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG900]], <2 x i64>* [[REG903:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG901]], i32* [[REG904:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store i32 [[REG902]], i32* [[REG905:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG906:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG903]], align 16
-// CHECK-NEXT: [[REG907:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG906]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG907]], <8 x i16>* [[REG908:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG909:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG904]], align 4
-// CHECK-NEXT: [[REG910:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG909]] to i16
-// CHECK-NEXT: [[REG911:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG905]], align 4
-// CHECK-NEXT: [[REG912:[0-9a-zA-Z_%.]+]] = and i32 [[REG911]], 7
-// CHECK-NEXT: [[REG913:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG908]], align 16
-// CHECK-NEXT: [[REG914:[0-9a-zA-Z_%.]+]] = insertelement <8 x i16> [[REG913]], i16 [[REG910]], i32 [[REG912]]
-// CHECK-NEXT: store <8 x i16> [[REG914]], <8 x i16>* [[REG908]], align 16
-// CHECK-NEXT: [[REG915:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG908]], align 16
-// CHECK-NEXT: [[REG916:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG915]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG916]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_insert_epi16
+// CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
+// CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 7
 
 void __attribute__((noinline))
 test_load() {
@@ -1357,100 +602,51 @@ test_load() {
 
 // CHECK-LABEL: @test_load
 
-// CHECK: define available_externally <2 x double> @_mm_load_pd(double* noundef [[REG917:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG917]], double** [[REG918:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG919:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG918]], align 8
-// CHECK-NEXT: [[REG920:[0-9a-zA-Z_%.]+]] = bitcast double* [[REG919]] to <16 x i8>*
-// CHECK-NEXT: [[REG921:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_ld(long, unsigned char vector[16] const*)(i64 noundef 0, <16 x i8>* noundef [[REG920]])
-// CHECK-NEXT: [[REG922:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG921]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG922]]
-
-// CHECK: define available_externally <2 x double> @_mm_load_pd1(double* noundef [[REG923:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG923]], double** [[REG924:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG925:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG924]], align 8
-// CHECK-NEXT: [[REG926:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_load1_pd(double* noundef [[REG925]])
-// CHECK-NEXT: ret <2 x double> [[REG926]]
-
-// CHECK: define available_externally <2 x double> @_mm_load_sd(double* noundef [[REG927:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG927]], double** [[REG928:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG929:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG928]], align 8
-// CHECK-NEXT: [[REG930:[0-9a-zA-Z_%.]+]] = load double, double* [[REG929]], align 8
-// CHECK-NEXT: [[REG931:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_set_sd(double noundef [[REG930]])
-// CHECK-NEXT: ret <2 x double> [[REG931]]
-
-// CHECK: define available_externally <2 x i64> @_mm_load_si128(<2 x i64>* noundef [[REG932:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64>* [[REG932]], <2 x i64>** [[REG933:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG934:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG933]], align 8
-// CHECK-NEXT: [[REG935:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG934]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG935]]
-
-// CHECK: define available_externally <2 x double> @_mm_load1_pd(double* noundef [[REG936:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG936]], double** [[REG937:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG938:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG937]], align 8
-// CHECK-NEXT: [[REG939:[0-9a-zA-Z_%.]+]] = load double, double* [[REG938]], align 8
-// CHECK-NEXT: [[REG940:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG939]])
-// CHECK-NEXT: ret <2 x double> [[REG940]]
-
-// CHECK: define available_externally <2 x double> @_mm_loadh_pd(<2 x double> noundef [[REG941:[0-9a-zA-Z_%.]+]], double* noundef [[REG942:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG941]], <2 x double>* [[REG943:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store double* [[REG942]], double** [[REG944:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG945:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG943]], align 16
-// CHECK-NEXT: store <2 x double> [[REG945]], <2 x double>* [[REG946:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG947:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG944]], align 8
-// CHECK-NEXT: [[REG948:[0-9a-zA-Z_%.]+]] = load double, double* [[REG947]], align 8
-// CHECK-NEXT: [[REG949:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG946]], align 16
-// CHECK-NEXT: [[REG950:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG949]], double [[REG948]], i32 1
-// CHECK-NEXT: store <2 x double> [[REG950]], <2 x double>* [[REG946]], align 16
-// CHECK-NEXT: [[REG951:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG946]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG951]]
-
-// CHECK: define available_externally <2 x i64> @_mm_loadl_epi64(<2 x i64>* noundef [[REG952:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64>* [[REG952]], <2 x i64>** [[REG953:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG954:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG953]], align 8
-// CHECK-NEXT: [[REG955:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64>* [[REG954]] to i64*
-// CHECK-NEXT: [[REG956:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG955]], align 8
-// CHECK-NEXT: [[REG957:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef [[REG956]])
-// CHECK-NEXT: ret <2 x i64> [[REG957]]
-
-// CHECK: define available_externally <2 x double> @_mm_loadl_pd(<2 x double> noundef [[REG958:[0-9a-zA-Z_%.]+]], double* noundef [[REG959:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG958]], <2 x double>* [[REG960:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store double* [[REG959]], double** [[REG961:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG962:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG960]], align 16
-// CHECK-NEXT: store <2 x double> [[REG962]], <2 x double>* [[REG963:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG964:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG961]], align 8
-// CHECK-NEXT: [[REG965:[0-9a-zA-Z_%.]+]] = load double, double* [[REG964]], align 8
-// CHECK-NEXT: [[REG966:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG963]], align 16
-// CHECK-NEXT: [[REG967:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG966]], double [[REG965]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG967]], <2 x double>* [[REG963]], align 16
-// CHECK-NEXT: [[REG968:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG963]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG968]]
-
-// CHECK: define available_externally <2 x double> @_mm_loadr_pd(double* noundef [[REG969:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG969]], double** [[REG970:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG971:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG970]], align 8
-// CHECK-NEXT: [[REG972:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_load_pd(double* noundef [[REG971]])
-// CHECK-NEXT: store <2 x double> [[REG972]], <2 x double>* [[REG973:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG974:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG973]], align 16
-// CHECK-NEXT: [[REG975:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG973]], align 16
-// CHECK-NEXT: [[REG976:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG974]] to <2 x i64>
-// CHECK-NEXT: [[REG977:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG975]] to <2 x i64>
-// CHECK-NEXT: [[REG978:[0-9a-zA-Z_%.]+]] = shufflevector <2 x i64> [[REG976]], <2 x i64> [[REG977]], <2 x i32> <i32 1, i32 2>
-// CHECK-NEXT: [[REG979:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG978]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG979]]
-
-// CHECK: define available_externally <2 x double> @_mm_loadu_pd(double* noundef [[REG980:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG980]], double** [[REG981:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG982:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG981]], align 8
-// CHECK-NEXT: [[REG983:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_vsx_ld(int, double const*)(i32 noundef signext 0, double* noundef [[REG982]])
-// CHECK-NEXT: ret <2 x double> [[REG983]]
-
-// CHECK: define available_externally <2 x i64> @_mm_loadu_si128(<2 x i64>* noundef [[REG984:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64>* [[REG984]], <2 x i64>** [[REG985:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG986:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG985]], align 8
-// CHECK-NEXT: [[REG987:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64>* [[REG986]] to i32*
-// CHECK-NEXT: [[REG988:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vsx_ld(int, int const*)(i32 noundef signext 0, i32* noundef [[REG987]])
-// CHECK-NEXT: [[REG989:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG988]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG989]]
+// CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd
+// CHECK: call <16 x i8> @vec_ld(long, unsigned char vector[16] const*)(i64 noundef 0, <16 x i8>* noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd1
+// CHECK: call <2 x double> @_mm_load1_pd(double* noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_load_sd
+// CHECK: call <2 x double> @_mm_set_sd(double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_load_si128
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load <2 x i64>*, <2 x i64>** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: load <2 x i64>, <2 x i64>* %[[ADDR]], align 16
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_load1_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, double* %[[ADDR]], align 8
+// CHECK: call <2 x double> @vec_splats(double)(double noundef %[[VAL]])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_loadh_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, double* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, <2 x double>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: insertelement <2 x double> %[[VEC]], double %[[VAL]], i32 1
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_loadl_epi64
+// CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_loadl_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load double, double* %[[ADDR]], align 8
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, <2 x double>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: insertelement <2 x double> %[[VEC]], double %[[ADDR2]], i32 0
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_loadr_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: call <2 x double> @_mm_load_pd(double* noundef %[[ADDR]])
+// CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_loadu_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: call <2 x double> @vec_vsx_ld(int, double const*)(i32 noundef signext 0, double* noundef %[[ADDR]])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_loadu_si128
+// CHECK: load <2 x i64>*, <2 x i64>** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: call <4 x i32> @vec_vsx_ld(int, int const*)(i32 noundef signext 0, i32* noundef %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_logical() {
@@ -1466,69 +662,29 @@ test_logical() {
 
 // CHECK-LABEL: @test_logical
 
-// CHECK: define available_externally <2 x double> @_mm_and_pd(<2 x double> noundef [[REG990:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG991:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG990]], <2 x double>* [[REG992:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG991]], <2 x double>* [[REG993:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG994:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG992]], align 16
-// CHECK-NEXT: [[REG995:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG993]], align 16
-// CHECK-NEXT: [[REG996:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_and(double vector[2], double vector[2])(<2 x double> noundef [[REG994]], <2 x double> noundef [[REG995]])
-// CHECK-NEXT: ret <2 x double> [[REG996]]
-
-// CHECK: define available_externally <2 x i64> @_mm_and_si128(<2 x i64> noundef [[REG997:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG998:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG997]], <2 x i64>* [[REG999:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG998]], <2 x i64>* [[REG1000:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1001:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG999]], align 16
-// CHECK-NEXT: [[REG1002:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1000]], align 16
-// CHECK-NEXT: [[REG1003:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_and(long long vector[2], long long vector[2])(<2 x i64> noundef [[REG1001]], <2 x i64> noundef [[REG1002]])
-// CHECK-NEXT: ret <2 x i64> [[REG1003]]
-
-// CHECK: define available_externally <2 x double> @_mm_andnot_pd(<2 x double> noundef [[REG1004:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1005:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1004]], <2 x double>* [[REG1006:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1005]], <2 x double>* [[REG1007:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1008:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1007]], align 16
-// CHECK-NEXT: [[REG1009:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1006]], align 16
-// CHECK-NEXT: [[REG1010:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_andc(double vector[2], double vector[2])(<2 x double> noundef [[REG1008]], <2 x double> noundef [[REG1009]])
-// CHECK-NEXT: ret <2 x double> [[REG1010]]
-
-// CHECK: define available_externally <2 x i64> @_mm_andnot_si128(<2 x i64> noundef [[REG1011:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1012:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1011]], <2 x i64>* [[REG1013:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1012]], <2 x i64>* [[REG1014:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1015:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1014]], align 16
-// CHECK-NEXT: [[REG1016:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1013]], align 16
-// CHECK-NEXT: [[REG1017:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_andc(long long vector[2], long long vector[2])(<2 x i64> noundef [[REG1015]], <2 x i64> noundef [[REG1016]])
-// CHECK-NEXT: ret <2 x i64> [[REG1017]]
-
-// CHECK: define available_externally <2 x double> @_mm_xor_pd(<2 x double> noundef [[REG1018:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1019:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1018]], <2 x double>* [[REG1020:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1019]], <2 x double>* [[REG1021:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1022:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1020]], align 16
-// CHECK-NEXT: [[REG1023:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1021]], align 16
-// CHECK-NEXT: [[REG1024:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_xor(double vector[2], double vector[2])(<2 x double> noundef [[REG1022]], <2 x double> noundef [[REG1023]])
-// CHECK-NEXT: ret <2 x double> [[REG1024]]
-
-// CHECK: define available_externally <2 x i64> @_mm_xor_si128(<2 x i64> noundef [[REG1025:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1026:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1025]], <2 x i64>* [[REG1027:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1026]], <2 x i64>* [[REG1028:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1029:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1027]], align 16
-// CHECK-NEXT: [[REG1030:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1028]], align 16
-// CHECK-NEXT: [[REG1031:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_xor(long long vector[2], long long vector[2])(<2 x i64> noundef [[REG1029]], <2 x i64> noundef [[REG1030]])
-// CHECK-NEXT: ret <2 x i64> [[REG1031]]
-
-// CHECK: define available_externally <2 x double> @_mm_or_pd(<2 x double> noundef [[REG1032:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1033:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1032]], <2 x double>* [[REG1034:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1033]], <2 x double>* [[REG1035:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1036:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1034]], align 16
-// CHECK-NEXT: [[REG1037:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1035]], align 16
-// CHECK-NEXT: [[REG1038:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_or(double vector[2], double vector[2])(<2 x double> noundef [[REG1036]], <2 x double> noundef [[REG1037]])
-// CHECK-NEXT: ret <2 x double> [[REG1038]]
-
-// CHECK: define available_externally <2 x i64> @_mm_or_si128(<2 x i64> noundef [[REG1039:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1040:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1039]], <2 x i64>* [[REG1041:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1040]], <2 x i64>* [[REG1042:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1043:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1041]], align 16
-// CHECK-NEXT: [[REG1044:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1042]], align 16
-// CHECK-NEXT: [[REG1045:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_or(long long vector[2], long long vector[2])(<2 x i64> noundef [[REG1043]], <2 x i64> noundef [[REG1044]])
-// CHECK-NEXT: ret <2 x i64> [[REG1045]]
+// CHECK-LABEL: define available_externally <2 x double> @_mm_and_pd
+// CHECK: call <2 x double> @vec_and(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_and_si128
+// CHECK: call <2 x i64> @vec_and(long long vector[2], long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_andnot_pd
+// CHECK: call <2 x double> @vec_andc(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_andnot_si128
+// CHECK: call <2 x i64> @vec_andc(long long vector[2], long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_xor_pd
+// CHECK: call <2 x double> @vec_xor(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_xor_si128
+// CHECK: call <2 x i64> @vec_xor(long long vector[2], long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_or_pd
+// CHECK: call <2 x double> @vec_or(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_or_si128
+// CHECK: call <2 x i64> @vec_or(long long vector[2], long long vector[2])
 
 void __attribute__((noinline))
 test_max() {
@@ -1540,41 +696,20 @@ test_max() {
 
 // CHECK-LABEL: @test_max
 
-// CHECK: define available_externally <2 x i64> @_mm_max_epi16
-// CHECK: [[REG1046:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_max(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG1047:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1046]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1047]]
-
-// CHECK: define available_externally <2 x i64> @_mm_max_epu8
-// CHECK: [[REG1048:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
-// CHECK-NEXT: [[REG1049:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1048]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1049]]
-
-// CHECK: define available_externally <2 x double> @_mm_max_pd
-// CHECK: [[REG1050:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_max(double vector[2], double vector[2])
-// CHECK-NEXT: ret <2 x double> [[REG1050]]
-
-// CHECK: define available_externally <2 x double> @_mm_max_sd(<2 x double> noundef [[REG1051:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1052:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1051]], <2 x double>* [[REG1053:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1052]], <2 x double>* [[REG1054:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1055:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1053]], align 16
-// CHECK-NEXT: [[REG1056:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1055]], i32 0
-// CHECK-NEXT: [[REG1057:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG1056]])
-// CHECK-NEXT: store <2 x double> [[REG1057]], <2 x double>* [[REG1058:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1059:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1054]], align 16
-// CHECK-NEXT: [[REG1060:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1059]], i32 0
-// CHECK-NEXT: [[REG1061:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG1060]])
-// CHECK-NEXT: store <2 x double> [[REG1061]], <2 x double>* [[REG1062:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1063:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1058]], align 16
-// CHECK-NEXT: [[REG1064:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1062]], align 16
-// CHECK-NEXT: [[REG1065:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_max(double vector[2], double vector[2])(<2 x double> noundef [[REG1063]], <2 x double> noundef [[REG1064]])
-// CHECK-NEXT: store <2 x double> [[REG1065]], <2 x double>* [[REG1066:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1067:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1066]], align 16
-// CHECK-NEXT: [[REG1068:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1067]], i32 0
-// CHECK-NEXT: [[REG1069:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1053]], align 16
-// CHECK-NEXT: [[REG1070:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1069]], i32 1
-// CHECK-NEXT: [[REG1071:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG1068]], double noundef [[REG1070]])
-// CHECK-NEXT: ret <2 x double> [[REG1071]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epi16
+// CHECK: call <8 x i16> @vec_max(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epu8
+// CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_max_pd
+// CHECK: call <2 x double> @vec_max(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_max_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_max(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_min() {
@@ -1586,41 +721,20 @@ test_min() {
 
 // CHECK-LABEL: @test_min
 
-// CHECK: define available_externally <2 x i64> @_mm_min_epi16
-// CHECK: [[REG1072:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_min(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG1073:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1072]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1073]]
-
-// CHECK: define available_externally <2 x i64> @_mm_min_epu8
-// CHECK: [[REG1074:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
-// CHECK-NEXT: [[REG1075:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1074]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1075]]
-
-// CHECK: define available_externally <2 x double> @_mm_min_pd
-// CHECK: [[REG1076:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_min(double vector[2], double vector[2])
-// CHECK-NEXT: ret <2 x double> [[REG1076]]
-
-// CHECK: define available_externally <2 x double> @_mm_min_sd(<2 x double> noundef [[REG1077:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1078:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1077]], <2 x double>* [[REG1079:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1078]], <2 x double>* [[REG1080:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1081:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1079]], align 16
-// CHECK-NEXT: [[REG1082:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1081]], i32 0
-// CHECK-NEXT: [[REG1083:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG1082]])
-// CHECK-NEXT: store <2 x double> [[REG1083]], <2 x double>* [[REG1084:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1085:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1080]], align 16
-// CHECK-NEXT: [[REG1086:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1085]], i32 0
-// CHECK-NEXT: [[REG1087:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splats(double)(double noundef [[REG1086]])
-// CHECK-NEXT: store <2 x double> [[REG1087]], <2 x double>* [[REG1088:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1089:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1084]], align 16
-// CHECK-NEXT: [[REG1090:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1088]], align 16
-// CHECK-NEXT: [[REG1091:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_min(double vector[2], double vector[2])(<2 x double> noundef [[REG1089]], <2 x double> noundef [[REG1090]])
-// CHECK-NEXT: store <2 x double> [[REG1091]], <2 x double>* [[REG1092:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1093:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1092]], align 16
-// CHECK-NEXT: [[REG1094:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1093]], i32 0
-// CHECK-NEXT: [[REG1095:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1079]], align 16
-// CHECK-NEXT: [[REG1096:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1095]], i32 1
-// CHECK-NEXT: [[REG1097:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG1094]], double noundef [[REG1096]])
-// CHECK-NEXT: ret <2 x double> [[REG1097]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epi16
+// CHECK: call <8 x i16> @vec_min(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epu8
+// CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_min_pd
+// CHECK: call <2 x double> @vec_min(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_min_sd
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_splats(double)
+// CHECK: call <2 x double> @vec_min(double vector[2], double vector[2])
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_move() {
@@ -1635,92 +749,36 @@ test_move() {
 
 // CHECK-LABEL: @test_move
 
-// CHECK: define available_externally <2 x i64> @_mm_move_epi64(<2 x i64> noundef [[REG1098:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1098]], <2 x i64>* [[REG1099:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1100:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1099]], align 16
-// CHECK-NEXT: [[REG1101:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG1100]], i32 0
-// CHECK-NEXT: [[REG1102:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef [[REG1101]])
-// CHECK-NEXT: ret <2 x i64> [[REG1102]]
-
-// CHECK: define available_externally <2 x double> @_mm_move_sd(<2 x double> noundef [[REG1103:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1104:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1103]], <2 x double>* [[REG1105:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1104]], <2 x double>* [[REG1106:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1107:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1105]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1107]], <2 x double>* [[REG1108:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1109:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1106]], align 16
-// CHECK-NEXT: [[REG1110:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1109]], i32 0
-// CHECK-NEXT: [[REG1111:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1108]], align 16
-// CHECK-NEXT: [[REG1112:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG1111]], double [[REG1110]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG1112]], <2 x double>* [[REG1108]], align 16
-// CHECK-NEXT: [[REG1113:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1108]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG1113]]
-
-// CHECK: define available_externally signext i32 @_mm_movemask_epi8(<2 x i64> noundef [[REG1114:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1114]], <2 x i64>* [[REG1115:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1116:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1115]], align 16
-// CHECK-NEXT: [[REG1117:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1116]] to <16 x i8>
-// CHECK-NEXT: [[REG1118:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG1117]], <16 x i8> noundef <i8 120, i8 112, i8 104, i8 96, i8 88, i8 80, i8 72, i8 64, i8 56, i8 48, i8 40, i8 32, i8 24, i8 16, i8 8, i8 0>)
-// CHECK-NEXT: store <2 x i64> [[REG1118]], <2 x i64>* [[REG1119:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1120:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1119]], align 16
-// CHECK-LE-NEXT: [[REG1121:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG1120]], i32 1
-// CHECK-BE-NEXT: [[REG1121:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG1120]], i32 0
-// CHECK-NEXT: [[REG1122:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG1121]] to i32
-// CHECK-NEXT: ret i32 [[REG1122]]
-
-// CHECK: define available_externally signext i32 @_mm_movemask_pd(<2 x double> noundef [[REG1123:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1123]], <2 x double>* [[REG1124:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1125:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1124]], align 16
-// CHECK-NEXT: [[REG1126:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1125]] to <16 x i8>
-// CHECK-LE-NEXT: [[REG1127:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG1126]], <16 x i8> noundef bitcast (<4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
-// CHECK-BE-NEXT: [[REG1127:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG1126]], <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656> to <16 x i8>))
-// CHECK-NEXT: store <2 x i64> [[REG1127]], <2 x i64>* [[REG1128:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1129:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1128]], align 16
-// CHECK-LE-NEXT: [[REG1130:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG1129]], i32 1
-// CHECK-BE-NEXT: [[REG1130:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG1129]], i32 0
-// CHECK-NEXT: [[REG1131:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG1130]] to i32
-// CHECK-NEXT: ret i32 [[REG1131]]
-
-// CHECK: define available_externally i64 @_mm_movepi64_pi64(<2 x i64> noundef [[REG1132:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1132]], <2 x i64>* [[REG1133:[0-9a-zA-Z_%.]+]], align 1
-// CHECK-NEXT: [[REG1134:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1133]], align 1
-// CHECK-NEXT: [[REG1135:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG1134]], i32 0
-// CHECK-NEXT: ret i64 [[REG1135]]
-
-// CHECK: define available_externally <2 x i64> @_mm_movpi64_epi64(i64 noundef [[REG1136:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG1136]], i64* [[REG1137:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1138:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1137]], align 8
-// CHECK-NEXT: [[REG1139:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef [[REG1138]])
-// CHECK-NEXT: ret <2 x i64> [[REG1139]]
-
-// CHECK: define available_externally void @_mm_maskmoveu_si128(<2 x i64> noundef [[REG1140:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1141:[0-9a-zA-Z_%.]+]], i8* noundef [[REG1142:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1140]], <2 x i64>* [[REG1143:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1141]], <2 x i64>* [[REG1144:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i8* [[REG1142]], i8** [[REG1145:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x i64> <i64 9187201950435737471, i64 9187201950435737471>, <2 x i64>* [[REG1146:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1147:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG1145]], align 8
-// CHECK-NEXT: [[REG1148:[0-9a-zA-Z_%.]+]] = bitcast i8* [[REG1147]] to <2 x i64>*
-// CHECK-NEXT: store <2 x i64>* [[REG1148]], <2 x i64>** [[REG1149:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1150:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG1149]], align 8
-// CHECK-NEXT: [[REG1151:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_loadu_si128(<2 x i64>* noundef [[REG1150]])
-// CHECK-NEXT: [[REG1152:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1151]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG1152]], <16 x i8>* [[REG1153:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1154:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1144]], align 16
-// CHECK-NEXT: [[REG1155:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1154]] to <16 x i8>
-// CHECK-NEXT: [[REG1156:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1146]], align 16
-// CHECK-NEXT: [[REG1157:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1156]] to <16 x i8>
-// CHECK-NEXT: [[REG1158:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG1155]], <16 x i8> noundef [[REG1157]])
-// CHECK-NEXT: store <16 x i8> [[REG1158]], <16 x i8>* [[REG1159:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1160:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1153]], align 16
-// CHECK-NEXT: [[REG1161:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1143]], align 16
-// CHECK-NEXT: [[REG1162:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1161]] to <16 x i8>
-// CHECK-NEXT: [[REG1163:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1159]], align 16
-// CHECK-NEXT: [[REG1164:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG1160]], <16 x i8> noundef [[REG1162]], <16 x i8> noundef [[REG1163]])
-// CHECK-NEXT: store <16 x i8> [[REG1164]], <16 x i8>* [[REG1153]], align 16
-// CHECK-NEXT: [[REG1165:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG1149]], align 8
-// CHECK-NEXT: [[REG1166:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1153]], align 16
-// CHECK-NEXT: [[REG1167:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1166]] to <2 x i64>
-// CHECK-NEXT: call void @_mm_storeu_si128(<2 x i64>* noundef [[REG1165]], <2 x i64> noundef [[REG1167]])
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_move_epi64
+// CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_move_sd
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x double> %{{[0-9a-zA-Z_.]+}}, double %[[EXT]], i32 0
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_movemask_epi8
+// CHECK: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 120, i8 112, i8 104, i8 96, i8 88, i8 80, i8 72, i8 64, i8 56, i8 48, i8 40, i8 32, i8 24, i8 16, i8 8, i8 0>)
+// CHECK-LE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK-BE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: trunc i64 %[[VAL]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pd
+// CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
+// CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656> to <16 x i8>))
+// CHECK-BE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_movepi64_pi64
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_movpi64_epi64
+// CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally void @_mm_maskmoveu_si128
+// CHECK: call <2 x i64> @_mm_loadu_si128(<2 x i64>* noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16])
+// CHECK: call void @_mm_storeu_si128(<2 x i64>* noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_mul() {
@@ -1735,113 +793,39 @@ test_mul() {
 
 // CHECK-LABEL: @test_mul
 
-// CHECK: define available_externally <2 x i64> @_mm_mul_epu32(<2 x i64> noundef [[REG1168:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1169:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1168]], <2 x i64>* [[REG1170:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1169]], <2 x i64>* [[REG1171:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1172:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1170]], align 16
-// CHECK-NEXT: [[REG1173:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1171]], align 16
-// CHECK-LE-NEXT: [[REG1174:[0-9a-zA-Z_%.]+]] = call <2 x i64> asm "vmulouw $0,$1,$2", "=v,v,v"(<2 x i64> [[REG1172]], <2 x i64> [[REG1173]])
-// CHECK-BE-NEXT: [[REG1174:[0-9a-zA-Z_%.]+]] = call <2 x i64> asm "vmuleuw $0,$1,$2", "=v,v,v"(<2 x i64> [[REG1172]], <2 x i64> [[REG1173]])
-// CHECK-NEXT: store <2 x i64> [[REG1174]], <2 x i64>* [[REG1175:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1176:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1175]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1176]]
-
-// CHECK: define available_externally <2 x double> @_mm_mul_pd(<2 x double> noundef [[REG1177:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1178:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1177]], <2 x double>* [[REG1179:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1178]], <2 x double>* [[REG1180:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1181:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1179]], align 16
-// CHECK-NEXT: [[REG1182:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1180]], align 16
-// CHECK-NEXT: [[REG1183:[0-9a-zA-Z_%.]+]] = fmul <2 x double> [[REG1181]], [[REG1182]]
-// CHECK-NEXT: ret <2 x double> [[REG1183]]
-
-// CHECK: define available_externally <2 x double> @_mm_mul_sd(<2 x double> noundef [[REG1184:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1185:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1184]], <2 x double>* [[REG1186:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1185]], <2 x double>* [[REG1187:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1188:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1186]], align 16
-// CHECK-NEXT: [[REG1189:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1188]], i32 0
-// CHECK-NEXT: [[REG1190:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1187]], align 16
-// CHECK-NEXT: [[REG1191:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1190]], i32 0
-// CHECK-NEXT: [[REG1192:[0-9a-zA-Z_%.]+]] = fmul double [[REG1189]], [[REG1191]]
-// CHECK-NEXT: [[REG1193:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1186]], align 16
-// CHECK-NEXT: [[REG1194:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG1193]], double [[REG1192]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG1194]], <2 x double>* [[REG1186]], align 16
-// CHECK-NEXT: [[REG1195:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1186]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG1195]]
-
-// CHECK: define available_externally i64 @_mm_mul_su32(i64 noundef [[REG1196:[0-9a-zA-Z_%.]+]], i64 noundef [[REG1197:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG1196]], i64* [[REG1198:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG1197]], i64* [[REG1199:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1200:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1198]], align 8
-// CHECK-NEXT: [[REG1201:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG1200]] to i32
-// CHECK-NEXT: store i32 [[REG1201]], i32* [[REG1202:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG1203:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1199]], align 8
-// CHECK-NEXT: [[REG1204:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG1203]] to i32
-// CHECK-NEXT: store i32 [[REG1204]], i32* [[REG1205:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG1206:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1202]], align 4
-// CHECK-NEXT: [[REG1207:[0-9a-zA-Z_%.]+]] = zext i32 [[REG1206]] to i64
-// CHECK-NEXT: [[REG1208:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1205]], align 4
-// CHECK-NEXT: [[REG1209:[0-9a-zA-Z_%.]+]] = zext i32 [[REG1208]] to i64
-// CHECK-NEXT: [[REG1210:[0-9a-zA-Z_%.]+]] = mul i64 [[REG1207]], [[REG1209]]
-// CHECK-NEXT: ret i64 [[REG1210]]
-
-// CHECK: define available_externally <2 x i64> @_mm_mulhi_epi16(<2 x i64> noundef [[REG1211:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1212:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1211]], <2 x i64>* [[REG1213:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1212]], <2 x i64>* [[REG1214:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* [[REG1215:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, <16 x i8>* [[REG1215:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1216:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1213]], align 16
-// CHECK-NEXT: [[REG1217:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1216]] to <8 x i16>
-// CHECK-NEXT: [[REG1218:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1214]], align 16
-// CHECK-NEXT: [[REG1219:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1218]] to <8 x i16>
-// CHECK-NEXT: [[REG1220:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulesh(<8 x i16> noundef [[REG1217]], <8 x i16> noundef [[REG1219]])
-// CHECK-NEXT: store <4 x i32> [[REG1220]], <4 x i32>* [[REG1221:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1222:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1213]], align 16
-// CHECK-NEXT: [[REG1223:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1222]] to <8 x i16>
-// CHECK-NEXT: [[REG1224:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1214]], align 16
-// CHECK-NEXT: [[REG1225:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1224]] to <8 x i16>
-// CHECK-NEXT: [[REG1226:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulosh(<8 x i16> noundef [[REG1223]], <8 x i16> noundef [[REG1225]])
-// CHECK-NEXT: store <4 x i32> [[REG1226]], <4 x i32>* [[REG1227:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1228:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1221]], align 16
-// CHECK-NEXT: [[REG1229:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1227]], align 16
-// CHECK-NEXT: [[REG1230:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1215]], align 16
-// CHECK-NEXT: [[REG1231:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG1228]], <4 x i32> noundef [[REG1229]], <16 x i8> noundef [[REG1230]])
-// CHECK-NEXT: [[REG1232:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1231]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1232]]
-
-// CHECK: define available_externally <2 x i64> @_mm_mulhi_epu16(<2 x i64> noundef [[REG1233:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1234:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1233]], <2 x i64>* [[REG1235:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1234]], <2 x i64>* [[REG1236:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE-NEXT: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* [[REG1237:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, <16 x i8>* [[REG1237:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1238:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1235]], align 16
-// CHECK-NEXT: [[REG1239:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1238]] to <8 x i16>
-// CHECK-NEXT: [[REG1240:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1236]], align 16
-// CHECK-NEXT: [[REG1241:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1240]] to <8 x i16>
-// CHECK-NEXT: [[REG1242:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmuleuh(<8 x i16> noundef [[REG1239]], <8 x i16> noundef [[REG1241]])
-// CHECK-NEXT: store <4 x i32> [[REG1242]], <4 x i32>* [[REG1243:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1244:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1235]], align 16
-// CHECK-NEXT: [[REG1245:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1244]] to <8 x i16>
-// CHECK-NEXT: [[REG1246:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1236]], align 16
-// CHECK-NEXT: [[REG1247:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1246]] to <8 x i16>
-// CHECK-NEXT: [[REG1248:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulouh(<8 x i16> noundef [[REG1245]], <8 x i16> noundef [[REG1247]])
-// CHECK-NEXT: store <4 x i32> [[REG1248]], <4 x i32>* [[REG1249:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1250:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1243]], align 16
-// CHECK-NEXT: [[REG1251:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1249]], align 16
-// CHECK-NEXT: [[REG1252:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1237]], align 16
-// CHECK-NEXT: [[REG1253:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG1250]], <4 x i32> noundef [[REG1251]], <16 x i8> noundef [[REG1252]])
-// CHECK-NEXT: [[REG1254:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1253]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1254]]
-
-// CHECK: define available_externally <2 x i64> @_mm_mullo_epi16(<2 x i64> noundef [[REG1255:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1256:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1255]], <2 x i64>* [[REG1257:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1256]], <2 x i64>* [[REG1258:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1259:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1257]], align 16
-// CHECK-NEXT: [[REG1260:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1259]] to <8 x i16>
-// CHECK-NEXT: [[REG1261:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1258]], align 16
-// CHECK-NEXT: [[REG1262:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1261]] to <8 x i16>
-// CHECK-NEXT: [[REG1263:[0-9a-zA-Z_%.]+]] = mul <8 x i16> [[REG1260]], [[REG1262]]
-// CHECK-NEXT: [[REG1264:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1263]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1264]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_mul_epu32
+// CHECK-LE: call <2 x i64> asm "vmulouw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}})
+// CHECK-BE: call <2 x i64> asm "vmuleuw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_mul_pd
+// CHECK: fmul <2 x double>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_mul_sd
+// CHECK: fmul double
+
+// CHECK-LABEL: define available_externally i64 @_mm_mul_su32
+// CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: %[[EXT1:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: mul i64 %[[EXT1]], %[[EXT2]]
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epi16
+// CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_vmulesh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_vmulosh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epu16
+// CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_mullo_epi16
+// CHECK: mul <8 x i16>
 
 void __attribute__((noinline))
 test_pack() {
@@ -1852,20 +836,14 @@ test_pack() {
 
 // CHECK-LABEL: @test_pack
 
-// CHECK: define available_externally <2 x i64> @_mm_packs_epi16
-// CHECK: [[REG1265:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packs(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG1266:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1265]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1266]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi16
+// CHECK: call <16 x i8> @vec_packs(short vector[8], short vector[8])
 
-// CHECK: define available_externally <2 x i64> @_mm_packs_epi32
-// CHECK: [[REG1267:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_packs(int vector[4], int vector[4])
-// CHECK-NEXT: [[REG1268:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1267]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1268]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi32
+// CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
 
-// CHECK: define available_externally <2 x i64> @_mm_packus_epi16
-// CHECK: [[REG1269:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packsu(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG1270:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1269]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1270]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_packus_epi16
+// CHECK: call <16 x i8> @vec_packsu(short vector[8], short vector[8])
 
 void __attribute__((noinline))
 test_sad() {
@@ -1874,18 +852,14 @@ test_sad() {
 
 // CHECK-LABEL: @test_sad
 
-// CHECK: define available_externally <2 x i64> @_mm_sad_epu8
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sad_epu8
 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
 // CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])
-// CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef zeroinitializer)
-// CHECK: call <4 x i32> @vec_sum2s(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef zeroinitializer)
-// CHECK-LE: [[REG1271:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sld(int vector[4], int vector[4], unsigned int)(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 4)
-// CHECK-BE: [[REG1271:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sld(int vector[4], int vector[4], unsigned int)(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 6)
-// CHECK-NEXT: store <4 x i32> [[REG1271]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1272:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1273:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1272]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1273]]
+// CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK: call <4 x i32> @vec_sum2s(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK-LE: call <4 x i32> @vec_sld(int vector[4], int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 4)
+// CHECK-BE: call <4 x i32> @vec_sld(int vector[4], int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 6)
 
 void __attribute__((noinline))
 test_set() {
@@ -1914,159 +888,91 @@ test_set() {
 
 // CHECK-LABEL: @test_set
 
-// CHECK: define available_externally <2 x i64> @_mm_set_epi16
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi16
 // CHECK-COUNT-8: store i16 {{[0-9a-zA-Z_%.]+}}, i16* {{[0-9a-zA-Z_%.]+}}, align 2
 // CHECK: insertelement <8 x i16> undef, i16 {{[0-9a-zA-Z_%.]+}}, i32 0
 // CHECK-COUNT-7: insertelement <8 x i16> {{[0-9a-zA-Z_%.]+}}, i16 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-7]}}
 
-// CHECK: define available_externally <2 x i64> @_mm_set_epi32
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi32
 // CHECK-COUNT-4: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
 // CHECK: insertelement <4 x i32> undef, i32 {{[0-9a-zA-Z_%.]+}}, i32 0
 // CHECK-COUNT-3: insertelement <4 x i32> {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-3]}}
 
-// CHECK: define available_externally <2 x i64> @_mm_set_epi64(i64 noundef [[REG1274:[0-9a-zA-Z_%.]+]], i64 noundef [[REG1275:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG1274]], i64* [[REG1276:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG1275]], i64* [[REG1277:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1278:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1276]], align 8
-// CHECK-NEXT: [[REG1279:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1277]], align 8
-// CHECK-NEXT: [[REG1280:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi64x(i64 noundef [[REG1278]], i64 noundef [[REG1279]])
-// CHECK-NEXT: ret <2 x i64> [[REG1280]]
-
-// CHECK: define available_externally <2 x i64> @_mm_set_epi64x(i64 noundef [[REG1281:[0-9a-zA-Z_%.]+]], i64 noundef [[REG1282:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG1281]], i64* [[REG1283:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG1282]], i64* [[REG1284:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1285:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1284]], align 8
-// CHECK-NEXT: [[REG1286:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG1285]], i32 0
-// CHECK-NEXT: [[REG1287:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1283]], align 8
-// CHECK-NEXT: [[REG1288:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG1286]], i64 [[REG1287]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG1288]], <2 x i64>* [[REG1289:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1290:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1289]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1290]]
-
-// CHECK: define available_externally <2 x i64> @_mm_set_epi8
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64
+// CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64x
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x i64> %[[VEC]], i64 %{{[0-9a-zA-Z_.]+}}, i32 1
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi8
 // CHECK-COUNT-16: store i8 {{[0-9a-zA-Z_%.]+}}, i8* {{[0-9a-zA-Z_%.]+}}, align 1
 // CHECK: insertelement <16 x i8> undef, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
 // CHECK-COUNT-15: {{[0-9a-zA-Z_%.]+}} = insertelement <16 x i8> {{[0-9a-zA-Z_%.]+}}, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
-// CHECK: store <16 x i8> {{[0-9a-zA-Z_%.]+}}, <16 x i8>* [[REG1291:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1292:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1291]], align 16
-// CHECK-NEXT: [[REG1293:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1292]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1293]]
-
-// CHECK: define available_externally <2 x double> @_mm_set_pd(double noundef [[REG1294:[0-9a-zA-Z_%.]+]], double noundef [[REG1295:[0-9a-zA-Z_%.]+]])
-// CHECK: store double [[REG1294]], double* [[REG1296:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store double [[REG1295]], double* [[REG1297:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1298:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1297]], align 8
-// CHECK-NEXT: [[REG1299:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> undef, double [[REG1298]], i32 0
-// CHECK-NEXT: [[REG1300:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1296]], align 8
-// CHECK-NEXT: [[REG1301:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG1299]], double [[REG1300]], i32 1
-// CHECK-NEXT: store <2 x double> [[REG1301]], <2 x double>* [[REG1302:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1303:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1302]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG1303]]
-
-// CHECK: define available_externally <2 x double> @_mm_set_pd1(double noundef [[REG1304:[0-9a-zA-Z_%.]+]])
-// CHECK: store double [[REG1304]], double* [[REG1305:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1306:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1305]], align 8
-// CHECK-NEXT: [[REG1307:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_set1_pd(double noundef [[REG1306]])
-// CHECK-NEXT: ret <2 x double> [[REG1307]]
-
-// CHECK: define available_externally <2 x double> @_mm_set_sd(double noundef [[REG1308:[0-9a-zA-Z_%.]+]])
-// CHECK: store double [[REG1308]], double* [[REG1309:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1310:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1309]], align 8
-// CHECK-NEXT: [[REG1311:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> undef, double [[REG1310]], i32 0
-// CHECK-NEXT: [[REG1312:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG1311]], double 0.000000e+00, i32 1
-// CHECK-NEXT: store <2 x double> [[REG1312]], <2 x double>* [[REG1313:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1314:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1313]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG1314]]
-
-// CHECK: define available_externally <2 x i64> @_mm_set1_epi16(i16 noundef signext [[REG1315:[0-9a-zA-Z_%.]+]])
-// CHECK: store i16 [[REG1315]], i16* [[REG1316:[0-9a-zA-Z_%.]+]], align 2
-// CHECK-COUNT-8: load i16, i16* [[REG1316]], align 2
-// CHECK-NEXT: [[REG1317:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi16
-// CHECK-NEXT: ret <2 x i64> [[REG1317]]
-
-// CHECK: define available_externally <2 x i64> @_mm_set1_epi32(i32 noundef signext [[REG1318:[0-9a-zA-Z_%.]+]])
-// CHECK: store i32 [[REG1318]], i32* [[REG1319:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-COUNT-4: load i32, i32* [[REG1319]], align 4
-// CHECK-NEXT: [[REG1320:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi32
-// CHECK-NEXT: ret <2 x i64> [[REG1320]]
-
-// CHECK: define available_externally <2 x i64> @_mm_set1_epi64(i64 noundef [[REG1321:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG1321]], i64* [[REG1322:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1323:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1322]], align 8
-// CHECK-NEXT: [[REG1324:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1322]], align 8
-// CHECK-NEXT: [[REG1325:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi64(i64 noundef [[REG1323]], i64 noundef [[REG1324]])
-// CHECK-NEXT: ret <2 x i64> [[REG1325]]
-
-// CHECK: define available_externally <2 x i64> @_mm_set1_epi64x(i64 noundef [[REG1326:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG1326]], i64* [[REG1327:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1328:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1327]], align 8
-// CHECK-NEXT: [[REG1329:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1327]], align 8
-// CHECK-NEXT: [[REG1330:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi64x(i64 noundef [[REG1328]], i64 noundef [[REG1329]])
-// CHECK-NEXT: ret <2 x i64> [[REG1330]]
-
-// CHECK: define available_externally <2 x i64> @_mm_set1_epi8(i8 noundef zeroext [[REG1331:[0-9a-zA-Z_%.]+]])
-// CHECK: store i8 [[REG1331]], i8* [[REG1332:[0-9a-zA-Z_%.]+]], align 1
-// CHECK-COUNT-16: load i8, i8* [[REG1332]], align 1
-// CHECK: [[REG1333:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi8
-// CHECK-NEXT: ret <2 x i64> [[REG1333]]
-
-// CHECK: define available_externally <2 x double> @_mm_set1_pd(double noundef [[REG1334:[0-9a-zA-Z_%.]+]])
-// CHECK: store double [[REG1334]], double* [[REG1335:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1336:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1335]], align 8
-// CHECK-NEXT: [[REG1337:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> undef, double [[REG1336]], i32 0
-// CHECK-NEXT: [[REG1338:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1335]], align 8
-// CHECK-NEXT: [[REG1339:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG1337]], double [[REG1338]], i32 1
-// CHECK-NEXT: store <2 x double> [[REG1339]], <2 x double>* [[REG1340:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1341:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1340]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG1341]]
-
-// CHECK: define available_externally <2 x i64> @_mm_setr_epi16(i16 noundef signext [[REG1342:[0-9a-zA-Z_%.]+]], i16 noundef signext [[REG1343:[0-9a-zA-Z_%.]+]], i16 noundef signext [[REG1344:[0-9a-zA-Z_%.]+]], i16 noundef signext [[REG1345:[0-9a-zA-Z_%.]+]], i16 noundef signext [[REG1346:[0-9a-zA-Z_%.]+]], i16 noundef signext [[REG1347:[0-9a-zA-Z_%.]+]], i16 noundef signext [[REG1348:[0-9a-zA-Z_%.]+]], i16 noundef signext [[REG1349:[0-9a-zA-Z_%.]+]])
-// CHECK-COUNT-8: store i16 {{[0-9a-zA-Z_%.]+}}, i16* {{[0-9a-zA-Z_%.]+}}, align 2
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd1
+// CHECK: call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_set_sd
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x double> %[[VEC]], double 0.000000e+00, i32 1
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi16
+// CHECK-COUNT-8: load i16, i16* %{{[0-9a-zA-Z_.]+}}, align 2
+// CHECK: call <2 x i64> @_mm_set_epi16
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi32
+// CHECK-COUNT-4: load i32, i32* %{{[0-9a-zA-Z_.]+}}, align 4
+// CHECK: call <2 x i64> @_mm_set_epi32
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64x
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi8
+// CHECK-COUNT-16: load i8, i8* %{{[0-9a-zA-Z_.]+}}, align 1
+// CHECK: call <2 x i64> @_mm_set_epi8
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_set1_pd
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi16
 // CHECK-COUNT-8: load i16, i16* {{[0-9a-zA-Z_%.]+}}, align 2
-// CHECK-NEXT: [[REG1350:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi16
-// CHECK-NEXT: ret <2 x i64> [[REG1350]]
+// CHECK: call <2 x i64> @_mm_set_epi16
 
-// CHECK: define available_externally <2 x i64> @_mm_setr_epi32
-// CHECK-COUNT-4: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi32
 // CHECK-COUNT-4: load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG1351:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi32
-// CHECK-NEXT: ret <2 x i64> [[REG1351]]
-
-// CHECK: define available_externally <2 x i64> @_mm_setr_epi64(i64 noundef [[REG1352:[0-9a-zA-Z_%.]+]], i64 noundef [[REG1353:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG1352]], i64* [[REG1354:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG1353]], i64* [[REG1355:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1356:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1355]], align 8
-// CHECK-NEXT: [[REG1357:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1354]], align 8
-// CHECK-NEXT: [[REG1358:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi64(i64 noundef [[REG1356]], i64 noundef [[REG1357]])
-// CHECK-NEXT: ret <2 x i64> [[REG1358]]
-
-// CHECK: define available_externally <2 x i64> @_mm_setr_epi8
-// CHECK-COUNT-16: store i8 {{[0-9a-zA-Z_%.]+}}, i8* {{[0-9a-zA-Z_%.]+}}, align 1
+// CHECK: call <2 x i64> @_mm_set_epi32
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi64
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi8
 // CHECK-COUNT-16: load i8, i8* {{[0-9a-zA-Z_%.]+}}, align 1
-// CHECK-NEXT: [[REG1359:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_set_epi8
-// CHECK-NEXT: ret <2 x i64> [[REG1359]]
-
-// CHECK: define available_externally <2 x double> @_mm_setr_pd(double noundef [[REG1360:[0-9a-zA-Z_%.]+]], double noundef [[REG1361:[0-9a-zA-Z_%.]+]])
-// CHECK: store double [[REG1360]], double* [[REG1362:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store double [[REG1361]], double* [[REG1363:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1364:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1362]], align 8
-// CHECK-NEXT: [[REG1365:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> undef, double [[REG1364]], i32 0
-// CHECK-NEXT: [[REG1366:[0-9a-zA-Z_%.]+]] = load double, double* [[REG1363]], align 8
-// CHECK-NEXT: [[REG1367:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG1365]], double [[REG1366]], i32 1
-// CHECK-NEXT: store <2 x double> [[REG1367]], <2 x double>* [[REG1368:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1369:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1368]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG1369]]
-
-// CHECK: define available_externally <2 x double> @_mm_setzero_pd()
-// CHECK: [[REG1370:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(int)(i32 noundef signext 0)
-// CHECK-NEXT: [[REG1371:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1370]] to <2 x double>
-// CHECK-NEXT: ret <2 x double> [[REG1371]]
-
-// CHECK: define available_externally <2 x i64> @_mm_setzero_si128()
-// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* [[REG1372:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1373:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1372]], align 16
-// CHECK-NEXT: [[REG1374:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1373]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1374]]
+// CHECK: call <2 x i64> @_mm_set_epi8
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_setr_pd
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_setzero_pd()
+// CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 0)
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_setzero_si128()
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
 
 void __attribute__((noinline))
 test_shuffle() {
@@ -2078,179 +984,75 @@ test_shuffle() {
 
 // CHECK-LABEL: @test_shuffle
 
-// CHECK: define available_externally <2 x i64> @_mm_shuffle_epi32(<2 x i64> noundef [[REG1375:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1376:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1375]], <2 x i64>* [[REG1377:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1376]], i32* [[REG1378:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG1379:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1378]], align 4
-// CHECK-NEXT: [[REG1380:[0-9a-zA-Z_%.]+]] = and i32 [[REG1379]], 3
-// CHECK-NEXT: [[REG1381:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1380]] to i64
-// CHECK-NEXT: store i64 [[REG1381]], i64* [[REG1382:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1383:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1378]], align 4
-// CHECK-NEXT: [[REG1384:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG1383]], 2
-// CHECK-NEXT: [[REG1385:[0-9a-zA-Z_%.]+]] = and i32 [[REG1384]], 3
-// CHECK-NEXT: [[REG1386:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1385]] to i64
-// CHECK-NEXT: store i64 [[REG1386]], i64* [[REG1387:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1388:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1378]], align 4
-// CHECK-NEXT: [[REG1389:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG1388]], 4
-// CHECK-NEXT: [[REG1390:[0-9a-zA-Z_%.]+]] = and i32 [[REG1389]], 3
-// CHECK-NEXT: [[REG1391:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1390]] to i64
-// CHECK-NEXT: store i64 [[REG1391]], i64* [[REG1392:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1393:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1378]], align 4
-// CHECK-NEXT: [[REG1394:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG1393]], 6
-// CHECK-NEXT: [[REG1395:[0-9a-zA-Z_%.]+]] = and i32 [[REG1394]], 3
-// CHECK-NEXT: [[REG1396:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1395]] to i64
-// CHECK-NEXT: store i64 [[REG1396]], i64* [[REG1397:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1398:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1382]], align 8
-// CHECK-NEXT: [[REG1399:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 [[REG1398]]
-// CHECK-NEXT: [[REG1400:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1399]], align 4
-// CHECK-NEXT: [[REG1401:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1402:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1403:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG1401]], i32 [[REG1400]], i32 0
-// CHECK-NEXT: store <4 x i32> [[REG1403]], <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1404:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1387]], align 8
-// CHECK-NEXT: [[REG1405:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 [[REG1404]]
-// CHECK-NEXT: [[REG1406:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1405]], align 4
-// CHECK-NEXT: [[REG1407:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1408:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG1407]], i32 [[REG1406]], i32 1
-// CHECK-NEXT: store <4 x i32> [[REG1408]], <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1409:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1392]], align 8
-// CHECK-NEXT: [[REG1410:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 [[REG1409]]
-// CHECK-NEXT: [[REG1411:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1410]], align 4
-// CHECK-NEXT: [[REG1412:[0-9a-zA-Z_%.]+]] = add i32 [[REG1411]], 269488144
-// CHECK-NEXT: [[REG1413:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1414:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG1413]], i32 [[REG1412]], i32 2
-// CHECK-NEXT: store <4 x i32> [[REG1414]], <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1415:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG1397]], align 8
-// CHECK-NEXT: [[REG1416:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 [[REG1415]]
-// CHECK-NEXT: [[REG1417:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1416]], align 4
-// CHECK-NEXT: [[REG1418:[0-9a-zA-Z_%.]+]] = add i32 [[REG1417]], 269488144
-// CHECK-NEXT: [[REG1419:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1420:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG1419]], i32 [[REG1418]], i32 3
-// CHECK-NEXT: store <4 x i32> [[REG1420]], <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1421:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1377]], align 16
-// CHECK-NEXT: [[REG1422:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1421]] to <4 x i32>
-// CHECK-NEXT: [[REG1423:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1377]], align 16
-// CHECK-NEXT: [[REG1424:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1423]] to <4 x i32>
-// CHECK-NEXT: [[REG1425:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1402]], align 16
-// CHECK-NEXT: [[REG1426:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1425]] to <16 x i8>
-// CHECK-NEXT: [[REG1427:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG1422]], <4 x i32> noundef [[REG1424]], <16 x i8> noundef [[REG1426]])
-// CHECK-NEXT: [[REG1428:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1427]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1428]]
-
-// CHECK: define available_externally <2 x double> @_mm_shuffle_pd(<2 x double> noundef [[REG1429:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1430:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1431:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1429]], <2 x double>* [[REG1432:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1430]], <2 x double>* [[REG1433:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1431]], i32* [[REG1434:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG1435:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1434]], align 4
-// CHECK-NEXT: [[REG1436:[0-9a-zA-Z_%.]+]] = and i32 [[REG1435]], 3
-// CHECK-NEXT: store i32 [[REG1436]], i32* [[REG1437:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG1438:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1437]], align 4
-// CHECK-NEXT: [[REG1439:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG1438]], 0
-// CHECK-NEXT: br i1 [[REG1439]], label %[[REG1440:[0-9a-zA-Z_%.]+]], label %[[REG1441:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1440]]:
-// CHECK-NEXT: [[REG1442:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1432]], align 16
-// CHECK-NEXT: [[REG1443:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1433]], align 16
-// CHECK-NEXT: [[REG1444:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergeh(double vector[2], double vector[2])(<2 x double> noundef [[REG1442]], <2 x double> noundef [[REG1443]])
-// CHECK-NEXT: store <2 x double> [[REG1444]], <2 x double>* [[REG1445:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1446:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1441]]:
-// CHECK-NEXT: [[REG1447:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1437]], align 4
-// CHECK-NEXT: [[REG1448:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG1447]], 1
-// CHECK-NEXT: br i1 [[REG1448]], label %[[REG1449:[0-9a-zA-Z_%.]+]], label %[[REG1450:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1449]]:
-// CHECK-NEXT: [[REG1451:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1433]], align 16
-// CHECK-NEXT: [[REG1452:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1432]], align 16
-// CHECK-NEXT: [[REG1453:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1451]] to <2 x i64>
-// CHECK-NEXT: [[REG1454:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1452]] to <2 x i64>
-// CHECK-NEXT: [[REG1455:[0-9a-zA-Z_%.]+]] = shufflevector <2 x i64> [[REG1453]], <2 x i64> [[REG1454]], <2 x i32> <i32 1, i32 2>
-// CHECK-NEXT: [[REG1456:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1455]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG1456]], <2 x double>* [[REG1445]], align 16
-// CHECK-NEXT: br label %[[REG1457:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1450]]:
-// CHECK-NEXT: [[REG1458:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1437]], align 4
-// CHECK-NEXT: [[REG1459:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG1458]], 2
-// CHECK-NEXT: br i1 [[REG1459]], label %[[REG1449:[0-9a-zA-Z_%.]+]], label %[[REG1450:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1449]]:
-// CHECK-NEXT: [[REG1460:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1433]], align 16
-// CHECK-NEXT: [[REG1461:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1432]], align 16
-// CHECK-NEXT: [[REG1462:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1460]] to <2 x i64>
-// CHECK-NEXT: [[REG1463:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1461]] to <2 x i64>
-// CHECK-NEXT: [[REG1464:[0-9a-zA-Z_%.]+]] = shufflevector <2 x i64> [[REG1462]], <2 x i64> [[REG1463]], <2 x i32> <i32 0, i32 3>
-// CHECK-NEXT: [[REG1465:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1464]] to <2 x double>
-// CHECK-NEXT: store <2 x double> [[REG1465]], <2 x double>* [[REG1445]], align 16
-// CHECK-NEXT: br label %[[REG1466:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1450]]:
-// CHECK-NEXT: [[REG1467:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1432]], align 16
-// CHECK-NEXT: [[REG1468:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1433]], align 16
-// CHECK-NEXT: [[REG1469:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])(<2 x double> noundef [[REG1467]], <2 x double> noundef [[REG1468]])
-// CHECK-NEXT: store <2 x double> [[REG1469]], <2 x double>* [[REG1445]], align 16
-// CHECK-NEXT: br label %[[REG1466:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1466]]:
-// CHECK-NEXT: br label %[[REG1457:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1457]]:
-// CHECK-NEXT: br label %[[REG1446:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1446]]:
-// CHECK-NEXT: [[REG1470:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1445]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG1470]]
-
-// CHECK: define available_externally <2 x i64> @_mm_shufflehi_epi16(<2 x i64> noundef [[REG1471:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1472:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1471]], <2 x i64>* [[REG1473:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1472]], i32* [[REG1474:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG1475:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1474]], align 4
-// CHECK-NEXT: [[REG1476:[0-9a-zA-Z_%.]+]] = and i32 [[REG1475]], 3
-// CHECK-NEXT: [[REG1477:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1476]] to i64
-// CHECK-NEXT: store i64 [[REG1477]], i64* [[REG1478:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1479:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1474]], align 4
-// CHECK-NEXT: [[REG1480:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG1479]], 2
-// CHECK-NEXT: [[REG1481:[0-9a-zA-Z_%.]+]] = and i32 [[REG1480]], 3
-// CHECK-NEXT: [[REG1482:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1481]] to i64
-// CHECK-NEXT: store i64 [[REG1482]], i64* [[REG1483:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1484:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1474]], align 4
-// CHECK-NEXT: [[REG1485:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG1484]], 4
-// CHECK-NEXT: [[REG1486:[0-9a-zA-Z_%.]+]] = and i32 [[REG1485]], 3
-// CHECK-NEXT: [[REG1487:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1486]] to i64
-// CHECK-NEXT: store i64 [[REG1487]], i64* [[REG1488:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG1489:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1474]], align 4
-// CHECK-NEXT: [[REG1490:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG1489]], 6
-// CHECK-NEXT: [[REG1491:[0-9a-zA-Z_%.]+]] = and i32 [[REG1490]], 3
-// CHECK-NEXT: [[REG1492:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1491]] to i64
-// CHECK-NEXT: store i64 [[REG1492]], i64* [[REG1493:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-LE-NEXT: store <2 x i64> <i64 1663540288323457296, i64 0>, <2 x i64>* [[REG1494:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: store <2 x i64> <i64 1157726452361532951, i64 0>, <2 x i64>* [[REG1494:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_shuffle_epi32
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: sext i32 %[[AND]] to i64
+// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
+// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
+// CHECK: sext i32 %[[AND2]] to i64
+// CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
+// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
+// CHECK: sext i32 %[[AND3]] to i64
+// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
+// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
+// CHECK: sext i32 %[[AND4]] to i64
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_epi32.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK: add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_shuffle_pd
+// CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 0
+// CHECK: br i1 %[[CMP]]
+// CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2])
+// CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 1
+// CHECK: br i1 %[[CMP2]]
+// CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
+// CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 2
+// CHECK: br i1 %[[CMP3]]
+// CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 0, i32 3>
+// CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflehi_epi16
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: sext i32 %[[AND]] to i64
+// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
+// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
+// CHECK: sext i32 %[[AND2]] to i64
+// CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
+// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
+// CHECK: sext i32 %[[AND3]] to i64
+// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
+// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
+// CHECK: sext i32 %[[AND4]] to i64
+// CHECK-LE: store <2 x i64> <i64 1663540288323457296, i64 0>, <2 x i64>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-BE: store <2 x i64> <i64 1157726452361532951, i64 0>, <2 x i64>* %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK-COUNT-4: getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shufflehi_epi16.permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
-// CHECK: [[REG1495:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
-// CHECK-NEXT: store <2 x i64> [[REG1495]], <2 x i64>* [[REG1496:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1497:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1496]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1497]]
-
-// CHECK: define available_externally <2 x i64> @_mm_shufflelo_epi16
-// CHECK: [[REG1498:[0-9a-zA-Z_%.]+]] = and i32 {{[0-9a-zA-Z_%.]+}}, 3
-// CHECK-NEXT: sext i32 [[REG1498]] to i64
-// CHECK: [[REG1499:[0-9a-zA-Z_%.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 2
-// CHECK-NEXT: [[REG1500:[0-9a-zA-Z_%.]+]] = and i32 [[REG1499]], 3
-// CHECK-NEXT: sext i32 [[REG1500]] to i64
-// CHECK: [[REG1501:[0-9a-zA-Z_%.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 4
-// CHECK-NEXT: [[REG1502:[0-9a-zA-Z_%.]+]] = and i32 [[REG1501]], 3
-// CHECK-NEXT: [[REG1503:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1502]] to i64
-// CHECK: [[REG1504:[0-9a-zA-Z_%.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 6
-// CHECK-NEXT: [[REG1505:[0-9a-zA-Z_%.]+]] = and i32 [[REG1504]], 3
-// CHECK-NEXT: [[REG1506:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1505]] to i64
-// CHECK-NEXT: store i64 [[REG1506]], i64* [[REG1507:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-LE-NEXT: store <2 x i64> <i64 0, i64 2242261671028070680>, <2 x i64>* [[REG1508:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-BE-NEXT: store <2 x i64> <i64 0, i64 1736447835066146335>, <2 x i64>* [[REG1508:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-COUNT-4: [[REG1509:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shufflelo_epi16.permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
-// CHECK: [[REG1510:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
-// CHECK-NEXT: store <2 x i64> [[REG1510]], <2 x i64>* [[REG1511:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1512:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1511]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1512]]
+// CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflelo_epi16
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 {{[0-9a-zA-Z_%.]+}}, 3
+// CHECK: sext i32 %[[AND]] to i64
+// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 2
+// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
+// CHECK: sext i32 %[[AND2]] to i64
+// CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 4
+// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
+// CHECK: sext i32 %[[AND3]] to i64
+// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 6
+// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
+// CHECK: sext i32 %[[AND4]] to i64
+// CHECK-LE: store <2 x i64> <i64 0, i64 2242261671028070680>, <2 x i64>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-BE: store <2 x i64> <i64 0, i64 1736447835066146335>, <2 x i64>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-COUNT-4: getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shufflelo_epi16.permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
+// CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
 
 void __attribute__((noinline))
 test_sll() {
@@ -2265,223 +1067,74 @@ test_sll() {
 
 // CHECK-LABEL: @test_sll
 
-// CHECK: define available_externally <2 x i64> @_mm_sll_epi16(<2 x i64> noundef [[REG1513:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1514:[0-9a-zA-Z_%.]+]])
-// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1515:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-LE: [[REG1516:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
-// CHECK-BE: [[REG1516:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
-// CHECK-NEXT: store <8 x i16> [[REG1516]], <8 x i16>* [[REG1517:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1518:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1517]], align 16
-// CHECK-NEXT: [[REG1519:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1518]], <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-// CHECK-NEXT: store <8 x i16> [[REG1519]], <8 x i16>* [[REG1520:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG1521:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8])
-// CHECK-NEXT: store <8 x i16> [[REG1521]], <8 x i16>* [[REG1522:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1523:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1520]], align 16
-// CHECK-NEXT: [[REG1524:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1522]], align 16
-// CHECK-NEXT: [[REG1525:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1520]], align 16
-// CHECK-NEXT: [[REG1526:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])(<8 x i16> noundef [[REG1523]], <8 x i16> noundef [[REG1524]], <8 x i16> noundef [[REG1525]])
-// CHECK-NEXT: store <8 x i16> [[REG1526]], <8 x i16>* [[REG1522]], align 16
-// CHECK-NEXT: [[REG1527:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1522]], align 16
-// CHECK-NEXT: [[REG1528:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1527]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1528]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sll_epi32(<2 x i64> noundef [[REG1529:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1530:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1529]], <2 x i64>* [[REG1531:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1530]], <2 x i64>* [[REG1532:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> <i32 32, i32 32, i32 32, i32 32>, <4 x i32>* [[REG1533:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1534:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1532]], align 16
-// CHECK-NEXT: [[REG1535:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1534]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG1536:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef [[REG1535]], i32 noundef zeroext 0)
-// CHECK-BE-NEXT: [[REG1536:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef [[REG1535]], i32 noundef zeroext 1)
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi16
+// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
+// CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
+// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+// CHECK: call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8])
+// CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi32
+// CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
 // CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
-// CHECK: [[REG1537:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4])
-// CHECK-NEXT: store <4 x i32> [[REG1537]], <4 x i32>* [[REG1538:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1539:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1540:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1538]], align 16
-// CHECK-NEXT: [[REG1541:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1542:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])(<4 x i32> noundef [[REG1539]], <4 x i32> noundef [[REG1540]], <4 x i32> noundef [[REG1541]])
-// CHECK-NEXT: store <4 x i32> [[REG1542]], <4 x i32>* [[REG1538]], align 16
-// CHECK-NEXT: [[REG1543:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1538]], align 16
-// CHECK-NEXT: [[REG1544:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1543]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1544]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sll_epi64
+// CHECK: call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4])
+// CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi64
 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0)
 // CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef <i64 64, i64 64>)
 // CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2])
-// CHECK: [[REG1545:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_sel(double vector[2], double vector[2], bool vector[2])
-// CHECK-NEXT: [[REG1546:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1545]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG1546]], <2 x i64>* [[REG1547:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1548:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1547]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1548]]
-
-// CHECK: define available_externally <2 x i64> @_mm_slli_epi16(<2 x i64> noundef [[REG1549:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1550:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1549]], <2 x i64>* [[REG1551:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1550]], i32* [[REG1552:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* [[REG1553:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1554:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1552]], align 4
-// CHECK-NEXT: [[REG1555:[0-9a-zA-Z_%.]+]] = icmp sge i32 [[REG1554]], 0
-// CHECK-NEXT: br i1 [[REG1555]], label %[[REG1556:[0-9a-zA-Z_%.]+]], label %[[REG1557:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1556]]:
-// CHECK-NEXT: [[REG1558:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1552]], align 4
-// CHECK-NEXT: [[REG1559:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1558]], 16
-// CHECK-NEXT: br i1 [[REG1559]], label %[[REG1560:[0-9a-zA-Z_%.]+]], label %[[REG1557:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1560]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %[[REG1561a:[0-9a-zA-Z_%.]+]], label %[[REG1561:[0-9a-zA-Z_%.]+]], label %[[REG1562:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1561]]:
-// CHECK-NEXT: [[REG1563:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1552]], align 4
-// CHECK-NEXT: [[REG1564:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1563]] to i8
-// CHECK-NEXT: [[REG1565:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext [[REG1564]])
-// CHECK-NEXT: store <8 x i16> [[REG1565]], <8 x i16>* [[REG1566:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1567:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1562]]:
-// CHECK-NEXT: [[REG1568:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1552]], align 4
-// CHECK-NEXT: [[REG1569:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1568]] to i16
-// CHECK-NEXT: [[REG1570:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext [[REG1569]])
-// CHECK-NEXT: store <8 x i16> [[REG1570]], <8 x i16>* [[REG1566]], align 16
-// CHECK-NEXT: br label %[[REG1567:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1567]]:
-// CHECK-NEXT: [[REG1571:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1551]], align 16
-// CHECK-NEXT: [[REG1572:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1571]] to <8 x i16>
-// CHECK-NEXT: [[REG1573:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1566]], align 16
-// CHECK-NEXT: [[REG1574:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1572]], <8 x i16> noundef [[REG1573]])
-// CHECK-NEXT: store <8 x i16> [[REG1574]], <8 x i16>* [[REG1553]], align 16
-// CHECK-NEXT: br label %[[REG1557:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1557]]:
-// CHECK-NEXT: [[REG1575:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1553]], align 16
-// CHECK-NEXT: [[REG1576:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1575]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1576]]
-
-// CHECK: define available_externally <2 x i64> @_mm_slli_epi32(<2 x i64> noundef [[REG1577:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1578:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1577]], <2 x i64>* [[REG1579:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1578]], i32* [[REG1580:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG1581:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1582:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1580]], align 4
-// CHECK-NEXT: [[REG1583:[0-9a-zA-Z_%.]+]] = icmp sge i32 [[REG1582]], 0
-// CHECK-NEXT: br i1 [[REG1583]], label %[[REG1584:[0-9a-zA-Z_%.]+]], label %[[REG1585:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1584]]:
-// CHECK-NEXT: [[REG1586:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1580]], align 4
-// CHECK-NEXT: [[REG1587:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1586]], 32
-// CHECK-NEXT: br i1 [[REG1587]], label %[[REG1588:[0-9a-zA-Z_%.]+]], label %[[REG1585:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1588]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %{{[0-9a-zA-Z_%.]+}}, label %[[REG1589:[0-9a-zA-Z_%.]+]], label %[[REG1590:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1589]]:
-// CHECK-NEXT: [[REG1591:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1580]], align 4
-// CHECK-NEXT: [[REG1592:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1591]], 16
-// CHECK-NEXT: br i1 [[REG1592]], label %[[REG1593:[0-9a-zA-Z_%.]+]], label %[[REG1590:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1593]]:
-// CHECK-NEXT: [[REG1594:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1580]], align 4
-// CHECK-NEXT: [[REG1595:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1594]] to i8
-// CHECK-NEXT: [[REG1596:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext [[REG1595]])
-// CHECK-NEXT: store <4 x i32> [[REG1596]], <4 x i32>* [[REG1597:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1598:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1590]]:
-// CHECK-NEXT: [[REG1599:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1580]], align 4
-// CHECK-NEXT: [[REG1600:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext [[REG1599]])
-// CHECK-NEXT: store <4 x i32> [[REG1600]], <4 x i32>* [[REG1597]], align 16
-// CHECK-NEXT: br label %[[REG1598:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1598]]:
-// CHECK-NEXT: [[REG1601:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1579]], align 16
-// CHECK-NEXT: [[REG1602:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1601]] to <4 x i32>
-// CHECK-NEXT: [[REG1603:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1597]], align 16
-// CHECK-NEXT: [[REG1604:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sl(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG1602]], <4 x i32> noundef [[REG1603]])
-// CHECK-NEXT: store <4 x i32> [[REG1604]], <4 x i32>* [[REG1581]], align 16
-// CHECK-NEXT: br label %[[REG1585:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1585]]:
-// CHECK-NEXT: [[REG1605:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1581]], align 16
-// CHECK-NEXT: [[REG1606:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1605]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1606]]
-
-// CHECK: define available_externally <2 x i64> @_mm_slli_epi64(<2 x i64> noundef [[REG1607:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1608:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1607]], <2 x i64>* [[REG1609:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1608]], i32* [[REG1610:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[REG1611:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1612:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1610]], align 4
-// CHECK-NEXT: [[REG1613:[0-9a-zA-Z_%.]+]] = icmp sge i32 [[REG1612]], 0
-// CHECK-NEXT: br i1 [[REG1613]], label %[[REG1614:[0-9a-zA-Z_%.]+]], label %[[REG1615:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1614]]:
-// CHECK-NEXT: [[REG1616:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1610]], align 4
-// CHECK-NEXT: [[REG1617:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1616]], 64
-// CHECK-NEXT: br i1 [[REG1617]], label %[[REG1618:[0-9a-zA-Z_%.]+]], label %[[REG1615:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1618]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %{{[0-9a-zA-Z_%.]+}}, label %[[REG1619:[0-9a-zA-Z_%.]+]], label %[[REG1620:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1619]]:
-// CHECK-NEXT: [[REG1621:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1610]], align 4
-// CHECK-NEXT: [[REG1622:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1621]], 16
-// CHECK-NEXT: br i1 [[REG1622]], label %[[REG1623:[0-9a-zA-Z_%.]+]], label %[[REG1620:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1623]]:
-// CHECK-NEXT: [[REG1624:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1610]], align 4
-// CHECK-NEXT: [[REG1625:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1624]] to i8
-// CHECK-NEXT: [[REG1626:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext [[REG1625]])
-// CHECK-NEXT: [[REG1627:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1626]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG1627]], <2 x i64>* [[REG1628:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1629:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1620]]:
-// CHECK-NEXT: [[REG1630:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1610]], align 4
-// CHECK-NEXT: [[REG1631:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext [[REG1630]])
-// CHECK-NEXT: [[REG1632:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1631]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG1632]], <2 x i64>* [[REG1628]], align 16
-// CHECK-NEXT: br label %[[REG1629:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1629]]:
-// CHECK-NEXT: [[REG1633:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1609]], align 16
-// CHECK-NEXT: [[REG1634:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1628]], align 16
-// CHECK-NEXT: [[REG1635:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_sl(long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG1633]], <2 x i64> noundef [[REG1634]])
-// CHECK-NEXT: store <2 x i64> [[REG1635]], <2 x i64>* [[REG1611]], align 16
-// CHECK-NEXT: br label %[[REG1615:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1615]]:
-// CHECK-NEXT: [[REG1636:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1611]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1636]]
-
-// CHECK: define available_externally <2 x i64> @_mm_slli_si128(<2 x i64> noundef [[REG1637:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1638:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1637]], <2 x i64>* [[REG1639:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1638]], i32* [[REG1640:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG1641:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1642:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1640]], align 4
-// CHECK-NEXT: [[REG1643:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1642]], 16
-// CHECK-NEXT: br i1 [[REG1643]], label %[[REG1644:[0-9a-zA-Z_%.]+]], label %[[REG1645:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1644]]:
-// CHECK-NEXT: [[REG1646:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1639]], align 16
-// CHECK-NEXT: [[REG1647:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1646]] to <16 x i8>
-// CHECK-NEXT: [[REG1648:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1640]], align 4
-// CHECK-BE-NEXT: [[REG1649:[0-9a-zA-Z_%.]+]] = sub nsw i32 16, [[REG1648]]
-// CHECK-BE-NEXT: [[REG1650:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef [[REG1647]], i32 noundef zeroext [[REG1649]])
-// CHECK-LE-NEXT: [[REG1650:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef [[REG1647]], <16 x i8> noundef zeroinitializer, i32 noundef zeroext [[REG1648]])
-// CHECK-NEXT: store <16 x i8> [[REG1650]], <16 x i8>* [[REG1651:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1652:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG1645]]:
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG1651]], align 16
-// CHECK-NEXT: br label %[[REG1652:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1652]]:
-// CHECK-NEXT: [[REG1653:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG1651]], align 16
-// CHECK-NEXT: [[REG1654:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG1653]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1654]]
+// CHECK: call <2 x double> @vec_sel(double vector[2], double vector[2], bool vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi16
+// CHECK: store <8 x i16> zeroinitializer, <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
+// CHECK: br i1 %[[CMP]]
+// CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP2]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]])
+// CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
+// CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC2]])
+// CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi32
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
+// CHECK: br i1 %[[CMP]]
+// CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
+// CHECK: br i1 %[[CMP2]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP3]]
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
+// CHECK: call <4 x i32> @vec_splats(unsigned int)
+// CHECK: call <4 x i32> @vec_sl(int vector[4], unsigned int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi64
+// CHECK: store <2 x i64> zeroinitializer, <2 x i64>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
+// CHECK: br i1 %[[CMP]]
+// CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64
+// CHECK: br i1 %[[CMP2]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP3]]
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
+// CHECK: call <4 x i32> @vec_splats(unsigned int)
+// CHECK: call <2 x i64> @vec_sl(long long vector[2], unsigned long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_si128
+// CHECK: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-BE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}}
+// CHECK-BE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]])
+// CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
+// CHECK: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
 
 void __attribute__((noinline))
 test_sqrt() {
@@ -2491,24 +1144,13 @@ test_sqrt() {
 
 // CHECK-LABEL: @test_sqrt
 
-// CHECK: define available_externally <2 x double> @_mm_sqrt_pd
-// CHECK: [[REG1655:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: ret <2 x double> [[REG1655]]
-
-// CHECK: define available_externally <2 x double> @_mm_sqrt_sd(<2 x double> noundef [[REG1656:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1657:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG1656]], <2 x double>* [[REG1658:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG1657]], <2 x double>* [[REG1659:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1660:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1659]], align 16
-// CHECK-NEXT: [[REG1661:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1660]], i32 0
-// CHECK-NEXT: [[REG1662:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_set1_pd(double noundef [[REG1661]])
-// CHECK-NEXT: [[REG1663:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef [[REG1662]])
-// CHECK-NEXT: store <2 x double> [[REG1663]], <2 x double>* [[REG1664:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1665:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1664]], align 16
-// CHECK-NEXT: [[REG1666:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1665]], i32 0
-// CHECK-NEXT: [[REG1667:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1658]], align 16
-// CHECK-NEXT: [[REG1668:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1667]], i32 1
-// CHECK-NEXT: [[REG1669:[0-9a-zA-Z_%.]+]] = call <2 x double> @_mm_setr_pd(double noundef [[REG1666]], double noundef [[REG1668]])
-// CHECK-NEXT: ret <2 x double> [[REG1669]]
+// CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_pd
+// CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef {{[0-9a-zA-Z_%.]+}})
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_sd
+// CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_sra() {
@@ -2520,140 +1162,43 @@ test_sra() {
 
 // CHECK-LABEL: @test_sra
 
-// CHECK: define available_externally <2 x i64> @_mm_sra_epi16(<2 x i64> noundef [[REG1670:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1671:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1670]], <2 x i64>* [[REG1672:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1671]], <2 x i64>* [[REG1673:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1674:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1675:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1673]], align 16
-// CHECK-NEXT: [[REG1676:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1675]] to <8 x i16>
-// CHECK-LE-NEXT: [[REG1677:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef [[REG1676]], i32 noundef zeroext 0)
-// CHECK-BE-NEXT: [[REG1677:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef [[REG1676]], i32 noundef zeroext 3)
-// CHECK-NEXT: store <8 x i16> [[REG1677]], <8 x i16>* [[REG1678:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1679:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1678]], align 16
-// CHECK-NEXT: [[REG1680:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1679]], <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-// CHECK-NEXT: store <8 x i16> [[REG1680]], <8 x i16>* [[REG1678]], align 16
-// CHECK-NEXT: [[REG1681:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1672]], align 16
-// CHECK-NEXT: [[REG1682:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1681]] to <8 x i16>
-// CHECK-NEXT: [[REG1683:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1678]], align 16
-// CHECK-NEXT: [[REG1684:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1682]], <8 x i16> noundef [[REG1683]])
-// CHECK-NEXT: store <8 x i16> [[REG1684]], <8 x i16>* [[REG1685:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1686:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1685]], align 16
-// CHECK-NEXT: [[REG1687:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1686]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1687]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sra_epi32(<2 x i64> noundef [[REG1688:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1689:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1688]], <2 x i64>* [[REG1690:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1689]], <2 x i64>* [[REG1691:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, <4 x i32>* [[REG1692:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1693:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1691]], align 16
-// CHECK-NEXT: [[REG1694:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1693]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG1695:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef [[REG1694]], i32 noundef zeroext 0)
-// CHECK-BE-NEXT: [[REG1695:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef [[REG1694]], i32 noundef zeroext 1)
-// CHECK-NEXT: store <4 x i32> [[REG1695]], <4 x i32>* [[REG1696:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1697:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1696]], align 16
-// CHECK-NEXT: [[REG1698:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG1697]], <4 x i32> noundef <i32 31, i32 31, i32 31, i32 31>)
-// CHECK-NEXT: store <4 x i32> [[REG1698]], <4 x i32>* [[REG1696]], align 16
-// CHECK-NEXT: [[REG1699:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1690]], align 16
-// CHECK-NEXT: [[REG1700:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1699]] to <4 x i32>
-// CHECK-NEXT: [[REG1701:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1696]], align 16
-// CHECK-NEXT: [[REG1702:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG1700]], <4 x i32> noundef [[REG1701]])
-// CHECK-NEXT: store <4 x i32> [[REG1702]], <4 x i32>* [[REG1703:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1704:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1703]], align 16
-// CHECK-NEXT: [[REG1705:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1704]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1705]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srai_epi16(<2 x i64> noundef [[REG1706:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1707:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1706]], <2 x i64>* [[REG1708:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1707]], i32* [[REG1709:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1710:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1711:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1709]], align 4
-// CHECK-NEXT: [[REG1712:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1711]], 16
-// CHECK-NEXT: br i1 [[REG1712]], label %[[REG1713:[0-9a-zA-Z_%.]+]], label %[[REG1714:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1713]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %[[REG1715a:[0-9a-zA-Z_%.]+]], label %[[REG1715:[0-9a-zA-Z_%.]+]], label %[[REG1716:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1715]]:
-// CHECK-NEXT: [[REG1717:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1709]], align 4
-// CHECK-NEXT: [[REG1718:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1717]] to i8
-// CHECK-NEXT: [[REG1719:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext [[REG1718]])
-// CHECK-NEXT: store <8 x i16> [[REG1719]], <8 x i16>* [[REG1710]], align 16
-// CHECK-NEXT: br label %[[REG1720:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1716]]:
-// CHECK-NEXT: [[REG1721:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1709]], align 4
-// CHECK-NEXT: [[REG1722:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1721]] to i16
-// CHECK-NEXT: [[REG1723:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext [[REG1722]])
-// CHECK-NEXT: store <8 x i16> [[REG1723]], <8 x i16>* [[REG1710]], align 16
-// CHECK-NEXT: br label %[[REG1720:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1720]]:
-// CHECK-NEXT: br label %[[REG1714:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1714]]:
-// CHECK-NEXT: [[REG1724:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1708]], align 16
-// CHECK-NEXT: [[REG1725:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1724]] to <8 x i16>
-// CHECK-NEXT: [[REG1726:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1710]], align 16
-// CHECK-NEXT: [[REG1727:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1725]], <8 x i16> noundef [[REG1726]])
-// CHECK-NEXT: store <8 x i16> [[REG1727]], <8 x i16>* [[REG1728:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1729:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1728]], align 16
-// CHECK-NEXT: [[REG1730:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1729]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1730]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srai_epi32(<2 x i64> noundef [[REG1731:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1732:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1731]], <2 x i64>* [[REG1733:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1732]], i32* [[REG1734:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, <4 x i32>* [[REG1735:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1736:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1734]], align 4
-// CHECK-NEXT: [[REG1737:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1736]], 32
-// CHECK-NEXT: br i1 [[REG1737]], label %[[REG1738:[0-9a-zA-Z_%.]+]], label %[[REG1739:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1738]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %[[REG1738:[0-9a-zA-Z_%.]+]], label %[[REG1740:[0-9a-zA-Z_%.]+]], label %[[REG1741:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1740]]:
-// CHECK-NEXT: [[REG1742:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1734]], align 4
-// CHECK-NEXT: [[REG1743:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1742]], 16
-// CHECK-NEXT: br i1 [[REG1743]], label %[[REG1744:[0-9a-zA-Z_%.]+]], label %[[REG1745:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1744]]:
-// CHECK-NEXT: [[REG1746:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1734]], align 4
-// CHECK-NEXT: [[REG1747:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1746]] to i8
-// CHECK-NEXT: [[REG1748:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext [[REG1747]])
-// CHECK-NEXT: store <4 x i32> [[REG1748]], <4 x i32>* [[REG1735]], align 16
-// CHECK-NEXT: br label %[[REG1749:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1745]]:
-// CHECK-NEXT: [[REG1750:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1734]], align 4
-// CHECK-NEXT: [[REG1751:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext [[REG1750]])
-// CHECK-NEXT: store <4 x i32> [[REG1751]], <4 x i32>* [[REG1735]], align 16
-// CHECK-NEXT: br label %[[REG1749:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1749]]:
-// CHECK-NEXT: br label %[[REG1752:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1741]]:
-// CHECK-NEXT: [[REG1753:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1734]], align 4
-// CHECK-NEXT: [[REG1754:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext [[REG1753]])
-// CHECK-NEXT: store <4 x i32> [[REG1754]], <4 x i32>* [[REG1735]], align 16
-// CHECK-NEXT: br label %[[REG1752:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1752]]:
-// CHECK-NEXT: br label %[[REG1739:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1739]]:
-// CHECK-NEXT: [[REG1755:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1733]], align 16
-// CHECK-NEXT: [[REG1756:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1755]] to <4 x i32>
-// CHECK-NEXT: [[REG1757:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1735]], align 16
-// CHECK-NEXT: [[REG1758:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG1756]], <4 x i32> noundef [[REG1757]])
-// CHECK-NEXT: store <4 x i32> [[REG1758]], <4 x i32>* [[REG1759:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1759]], align 16
-// CHECK-NEXT: [[REG1761:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1760]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1761]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi16
+// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
+// CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+// CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi32
+// CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
+// CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 31, i32 31, i32 31, i32 31>)
+// CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi16
+// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]])
+// CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
+// CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi32
+// CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
+// CHECK: br i1 %[[CMP]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP2]]
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
+// CHECK: call <4 x i32> @vec_splats(unsigned int)
+// CHECK: call <4 x i32> @vec_splats(unsigned int)
+// CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])
 
 void __attribute__((noinline))
 test_srl() {
@@ -2668,228 +1213,67 @@ test_srl() {
 
 // CHECK-LABEL: @test_srl
 
-// CHECK: define available_externally <2 x i64> @_mm_srl_epi16(<2 x i64> noundef [[REG1762:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1763:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1762]], <2 x i64>* [[REG1764:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1763]], <2 x i64>* [[REG1765:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* [[REG1766:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1767:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1765]], align 16
-// CHECK-NEXT: [[REG1768:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1767]] to <8 x i16>
-// CHECK-LE-NEXT: [[REG1769:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef [[REG1768]], i32 noundef zeroext 0)
-// CHECK-BE-NEXT: [[REG1769:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef [[REG1768]], i32 noundef zeroext 3)
-// CHECK-NEXT: store <8 x i16> [[REG1769]], <8 x i16>* [[REG1770:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1771:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1770]], align 16
-// CHECK-NEXT: [[REG1772:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1771]], <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-// CHECK-NEXT: store <8 x i16> [[REG1772]], <8 x i16>* [[REG1773:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1774:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1764]], align 16
-// CHECK-NEXT: [[REG1775:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1774]] to <8 x i16>
-// CHECK-NEXT: [[REG1776:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1770]], align 16
-// CHECK-NEXT: [[REG1777:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1775]], <8 x i16> noundef [[REG1776]])
-// CHECK-NEXT: store <8 x i16> [[REG1777]], <8 x i16>* [[REG1778:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1779:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1773]], align 16
-// CHECK-NEXT: [[REG1780:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1778]], align 16
-// CHECK-NEXT: [[REG1781:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1773]], align 16
-// CHECK-NEXT: [[REG1782:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])(<8 x i16> noundef [[REG1779]], <8 x i16> noundef [[REG1780]], <8 x i16> noundef [[REG1781]])
-// CHECK-NEXT: store <8 x i16> [[REG1782]], <8 x i16>* [[REG1778]], align 16
-// CHECK-NEXT: [[REG1783:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1778]], align 16
-// CHECK-NEXT: [[REG1784:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1783]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1784]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srl_epi32(<2 x i64> noundef [[REG1785:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1786:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1785]], <2 x i64>* [[REG1787:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1786]], <2 x i64>* [[REG1788:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> <i32 32, i32 32, i32 32, i32 32>, <4 x i32>* [[REG1789:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1790:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1788]], align 16
-// CHECK-NEXT: [[REG1791:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1790]] to <4 x i32>
-// CHECK-LE-NEXT: [[REG1792:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef [[REG1791]], i32 noundef zeroext 0)
-// CHECK-BE-NEXT: [[REG1792:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef [[REG1791]], i32 noundef zeroext 1)
-// CHECK-NEXT: store <4 x i32> [[REG1792]], <4 x i32>* [[REG1793:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1794:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1793]], align 16
-// CHECK-NEXT: [[REG1795:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG1794]], <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
-// CHECK-NEXT: store <4 x i32> [[REG1795]], <4 x i32>* [[REG1796:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1797:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1787]], align 16
-// CHECK-NEXT: [[REG1798:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1797]] to <4 x i32>
-// CHECK-NEXT: [[REG1799:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1793]], align 16
-// CHECK-NEXT: [[REG1800:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG1798]], <4 x i32> noundef [[REG1799]])
-// CHECK-NEXT: store <4 x i32> [[REG1800]], <4 x i32>* [[REG1801:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1802:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1796]], align 16
-// CHECK-NEXT: [[REG1803:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1801]], align 16
-// CHECK-NEXT: [[REG1804:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1796]], align 16
-// CHECK-NEXT: [[REG1805:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])(<4 x i32> noundef [[REG1802]], <4 x i32> noundef [[REG1803]], <4 x i32> noundef [[REG1804]])
-// CHECK-NEXT: store <4 x i32> [[REG1805]], <4 x i32>* [[REG1801]], align 16
-// CHECK-NEXT: [[REG1806:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1801]], align 16
-// CHECK-NEXT: [[REG1807:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1806]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1807]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srl_epi64(<2 x i64> noundef [[REG1808:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1809:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1808]], <2 x i64>* [[REG1810:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG1809]], <2 x i64>* [[REG1811:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> <i64 64, i64 64>, <2 x i64>* [[REG1812:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1813:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1811]], align 16
-// CHECK-NEXT: [[REG1814:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef [[REG1813]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <2 x i64> [[REG1814]], <2 x i64>* [[REG1815:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1816:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1815]], align 16
-// CHECK-NEXT: [[REG1817:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG1816]], <2 x i64> noundef <i64 64, i64 64>)
-// CHECK-NEXT: store <2 x i64> [[REG1817]], <2 x i64>* [[REG1818:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1819:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1810]], align 16
-// CHECK-NEXT: [[REG1820:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1815]], align 16
-// CHECK-NEXT: [[REG1821:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG1819]], <2 x i64> noundef [[REG1820]])
-// CHECK-NEXT: store <2 x i64> [[REG1821]], <2 x i64>* [[REG1822:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1823:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1818]], align 16
-// CHECK-NEXT: [[REG1824:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1823]] to <2 x double>
-// CHECK-NEXT: [[REG1825:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1822]], align 16
-// CHECK-NEXT: [[REG1826:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1825]] to <2 x double>
-// CHECK-NEXT: [[REG1827:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1818]], align 16
-// CHECK-NEXT: [[REG1828:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_sel(double vector[2], double vector[2], bool vector[2])(<2 x double> noundef [[REG1824]], <2 x double> noundef [[REG1826]], <2 x i64> noundef [[REG1827]])
-// CHECK-NEXT: [[REG1829:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1828]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG1829]], <2 x i64>* [[REG1822]], align 16
-// CHECK-NEXT: [[REG1830:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1822]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1830]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srli_epi16(<2 x i64> noundef [[REG1831:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1832:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1831]], <2 x i64>* [[REG1833:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1832]], i32* [[REG1834:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* [[REG1835:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1836:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1834]], align 4
-// CHECK-NEXT: [[REG1837:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1836]], 16
-// CHECK-NEXT: br i1 [[REG1837]], label %[[REG1838:[0-9a-zA-Z_%.]+]], label %[[REG1839:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG1838]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %[[REG1838a:[0-9a-zA-Z_%.]+]], label %[[REG1840:[0-9a-zA-Z_%.]+]], label %[[REG1841:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG1840]]:
-// CHECK-NEXT: [[REG1842:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1834]], align 4
-// CHECK-NEXT: [[REG1843:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1842]] to i8
-// CHECK-NEXT: [[REG1844:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext [[REG1843]])
-// CHECK-NEXT: store <8 x i16> [[REG1844]], <8 x i16>* [[REG1845:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1846:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG1841]]:
-// CHECK-NEXT: [[REG1847:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1834]], align 4
-// CHECK-NEXT: [[REG1848:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1847]] to i16
-// CHECK-NEXT: [[REG1849:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext [[REG1848]])
-// CHECK-NEXT: store <8 x i16> [[REG1849]], <8 x i16>* [[REG1845]], align 16
-// CHECK-NEXT: br label %[[REG1846:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG1846]]:
-// CHECK-NEXT: [[REG1850:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1833]], align 16
-// CHECK-NEXT: [[REG1851:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1850]] to <8 x i16>
-// CHECK-NEXT: [[REG1852:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1845]], align 16
-// CHECK-NEXT: [[REG1853:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sr(short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG1851]], <8 x i16> noundef [[REG1852]])
-// CHECK-NEXT: store <8 x i16> [[REG1853]], <8 x i16>* [[REG1835]], align 16
-// CHECK-NEXT: br label %[[REG1839:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG1839]]:
-// CHECK-NEXT: [[REG1854:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG1835]], align 16
-// CHECK-NEXT: [[REG1855:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG1854]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1855]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srli_epi32(<2 x i64> noundef [[REG1856:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1857:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1856]], <2 x i64>* [[REG1858:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1857]], i32* [[REG1859:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG1860:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1861:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1859]], align 4
-// CHECK-NEXT: [[REG1862:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1861]], 32
-// CHECK-NEXT: br i1 [[REG1862]], label %[[REG1863:[0-9a-zA-Z_%.]+]], label %[[REG1864:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1863]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %[[REG1865a:[0-9a-zA-Z_%.]+]], label %[[REG1865:[0-9a-zA-Z_%.]+]], label %[[REG1866:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1865]]:
-// CHECK-NEXT: [[REG1867:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1859]], align 4
-// CHECK-NEXT: [[REG1868:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1867]], 16
-// CHECK-NEXT: br i1 [[REG1868]], label %[[REG1869:[0-9a-zA-Z_%.]+]], label %[[REG1870:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1869]]:
-// CHECK-NEXT: [[REG1871:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1859]], align 4
-// CHECK-NEXT: [[REG1872:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1871]] to i8
-// CHECK-NEXT: [[REG1873:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext [[REG1872]])
-// CHECK-NEXT: store <4 x i32> [[REG1873]], <4 x i32>* [[REG1874:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1875:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1870]]:
-// CHECK-NEXT: [[REG1876:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1859]], align 4
-// CHECK-NEXT: [[REG1877:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext [[REG1876]])
-// CHECK-NEXT: store <4 x i32> [[REG1877]], <4 x i32>* [[REG1874]], align 16
-// CHECK-NEXT: br label %[[REG1875:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1875]]:
-// CHECK-NEXT: br label %[[REG1878:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1866]]:
-// CHECK-NEXT: [[REG1879:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1859]], align 4
-// CHECK-NEXT: [[REG1880:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext [[REG1879]])
-// CHECK-NEXT: store <4 x i32> [[REG1880]], <4 x i32>* [[REG1874]], align 16
-// CHECK-NEXT: br label %[[REG1878:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1878]]:
-// CHECK-NEXT: [[REG1881:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1858]], align 16
-// CHECK-NEXT: [[REG1882:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1881]] to <4 x i32>
-// CHECK-NEXT: [[REG1883:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1874]], align 16
-// CHECK-NEXT: [[REG1884:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG1882]], <4 x i32> noundef [[REG1883]])
-// CHECK-NEXT: store <4 x i32> [[REG1884]], <4 x i32>* [[REG1860]], align 16
-// CHECK-NEXT: br label %[[REG1864:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1864]]:
-// CHECK-NEXT: [[REG1885:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG1860]], align 16
-// CHECK-NEXT: [[REG1886:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1885]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG1886]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srli_epi64(<2 x i64> noundef [[REG1887:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG1888:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1887]], <2 x i64>* [[REG1889:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG1888]], i32* [[REG1890:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[REG1891:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1892:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1890]], align 4
-// CHECK-NEXT: [[REG1893:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1892]], 64
-// CHECK-NEXT: br i1 [[REG1893]], label %[[REG1894:[0-9a-zA-Z_%.]+]], label %[[REG1895:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1894]]:
-// CHECK-NEXT: load
-// CHECK-NEXT: call i1 @llvm.is.constant
-// CHECK-NEXT: br i1 %[[REG1896a:[0-9a-zA-Z_%.]+]], label %[[REG1896:[0-9a-zA-Z_%.]+]], label %[[REG1897:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1896]]:
-// CHECK-NEXT: [[REG1898:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1890]], align 4
-// CHECK-NEXT: [[REG1899:[0-9a-zA-Z_%.]+]] = icmp slt i32 [[REG1898]], 16
-// CHECK-NEXT: br i1 [[REG1899]], label %[[REG1900:[0-9a-zA-Z_%.]+]], label %[[REG1901:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1900]]:
-// CHECK-NEXT: [[REG1902:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1890]], align 4
-// CHECK-NEXT: [[REG1903:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG1902]] to i8
-// CHECK-NEXT: [[REG1904:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext [[REG1903]])
-// CHECK-NEXT: [[REG1905:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1904]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG1905]], <2 x i64>* [[REG1906:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: br label %[[REG1907:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1901]]:
-// CHECK-NEXT: [[REG1908:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1890]], align 4
-// CHECK-NEXT: [[REG1909:[0-9a-zA-Z_%.]+]] = sext i32 [[REG1908]] to i64
-// CHECK-NEXT: [[REG1910:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG1909]])
-// CHECK-NEXT: store <2 x i64> [[REG1910]], <2 x i64>* [[REG1906]], align 16
-// CHECK-NEXT: br label %[[REG1907:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1907]]:
-// CHECK-NEXT: br label %[[REG1911:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1897]]:
-// CHECK-NEXT: [[REG1912:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG1890]], align 4
-// CHECK-NEXT: [[REG1913:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext [[REG1912]])
-// CHECK-NEXT: [[REG1914:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG1913]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG1914]], <2 x i64>* [[REG1906]], align 16
-// CHECK-NEXT: br label %[[REG1911:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1911]]:
-// CHECK-NEXT: [[REG1915:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1889]], align 16
-// CHECK-NEXT: [[REG1916:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1906]], align 16
-// CHECK-NEXT: [[REG1917:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_sr(long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG1915]], <2 x i64> noundef [[REG1916]])
-// CHECK-NEXT: store <2 x i64> [[REG1917]], <2 x i64>* [[REG1891]], align 16
-// CHECK-NEXT: br label %[[REG1895:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG1895]]:
-// CHECK-NEXT: [[REG1918:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1891]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG1918]]
-
-// CHECK: define available_externally <2 x i64> @_mm_srli_si128
-// CHECK: [[REG1919:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_bsrli_si128
-// CHECK-NEXT: ret <2 x i64> [[REG1919]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi16
+// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
+// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+// CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])
+// CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi32
+// CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
+// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
+// CHECK: call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4])
+// CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi64
+// CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef <i64 64, i64 64>)
+// CHECK: call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2])
+// CHECK: call <2 x double> @vec_sel(double vector[2], double vector[2], bool vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi16
+// CHECK: store <8 x i16> zeroinitializer, <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <8 x i16> @vec_splat_s16(signed char)
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
+// CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC]])
+// CHECK: call <8 x i16> @vec_sr(short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi32
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
+// CHECK: br i1 %[[CMP]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP2]]
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <4 x i32> @vec_splat_s32(signed char)
+// CHECK: call <4 x i32> @vec_splats(unsigned int)
+// CHECK: call <4 x i32> @vec_splats(unsigned int)
+// CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi64
+// CHECK: store <2 x i64> zeroinitializer, <2 x i64>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64
+// CHECK: br i1 %[[CMP]]
+// CHECK: call i1 @llvm.is.constant
+// CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP2]]
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)(i64 noundef %[[EXT]])
+// CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <2 x i64> @vec_sr(long long vector[2], unsigned long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_si128
+// CHECK: call <2 x i64> @_mm_bsrli_si128
 
 void __attribute__((noinline))
 test_store() {
@@ -2908,108 +1292,57 @@ test_store() {
 
 // CHECK-LABEL: @test_store
 
-// CHECK: define available_externally void @_mm_store_pd(double* noundef [[REG1920:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1921:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1920]], double** [[REG1922:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1921]], <2 x double>* [[REG1923:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1924:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1923]], align 16
-// CHECK-NEXT: [[REG1925:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1924]] to <16 x i8>
-// CHECK-NEXT: [[REG1926:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1922]], align 8
-// CHECK-NEXT: [[REG1927:[0-9a-zA-Z_%.]+]] = bitcast double* [[REG1926]] to <16 x i8>*
-// CHECK-NEXT: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef [[REG1925]], i64 noundef 0, <16 x i8>* noundef [[REG1927]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store_pd1(double* noundef [[REG1928:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1929:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1928]], double** [[REG1930:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1929]], <2 x double>* [[REG1931:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1932:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1930]], align 8
-// CHECK-NEXT: [[REG1933:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1931]], align 16
-// CHECK-NEXT: call void @_mm_store1_pd(double* noundef [[REG1932]], <2 x double> noundef [[REG1933]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store_sd(double* noundef [[REG1934:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1935:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1934]], double** [[REG1936:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1935]], <2 x double>* [[REG1937:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1938:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1937]], align 16
-// CHECK-NEXT: [[REG1939:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1938]], i32 0
-// CHECK-NEXT: [[REG1940:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1936]], align 8
-// CHECK-NEXT: store double [[REG1939]], double* [[REG1940]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store_si128(<2 x i64>* noundef [[REG1941:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1942:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64>* [[REG1941]], <2 x i64>** [[REG1943:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x i64> [[REG1942]], <2 x i64>* [[REG1944:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1945:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1944]], align 16
-// CHECK-NEXT: [[REG1946:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1945]] to <16 x i8>
-// CHECK-NEXT: [[REG1947:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG1943]], align 8
-// CHECK-NEXT: [[REG1948:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64>* [[REG1947]] to <16 x i8>*
-// CHECK-NEXT: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef [[REG1946]], i64 noundef 0, <16 x i8>* noundef [[REG1948]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store1_pd(double* noundef [[REG1949:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1950:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1949]], double** [[REG1951:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1950]], <2 x double>* [[REG1952:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1953:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1951]], align 8
-// CHECK-NEXT: [[REG1954:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1952]], align 16
-// CHECK-NEXT: [[REG1955:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef [[REG1954]], i32 noundef zeroext 0)
-// CHECK-NEXT: call void @_mm_store_pd(double* noundef [[REG1953]], <2 x double> noundef [[REG1955]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storeh_pd(double* noundef [[REG1956:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1957:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1956]], double** [[REG1958:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1957]], <2 x double>* [[REG1959:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1960:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1959]], align 16
-// CHECK-NEXT: [[REG1961:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG1960]], i32 1
-// CHECK-NEXT: [[REG1962:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1958]], align 8
-// CHECK-NEXT: store double [[REG1961]], double* [[REG1962]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storel_epi64(<2 x i64>* noundef [[REG1963:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1964:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64>* [[REG1963]], <2 x i64>** [[REG1965:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x i64> [[REG1964]], <2 x i64>* [[REG1966:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1967:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1966]], align 16
-// CHECK-NEXT: [[REG1968:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG1967]], i32 0
-// CHECK-NEXT: [[REG1969:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG1965]], align 8
-// CHECK-NEXT: [[REG1970:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64>* [[REG1969]] to i64*
-// CHECK-NEXT: store i64 [[REG1968]], i64* [[REG1970]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storel_pd(double* noundef [[REG1971:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1972:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1971]], double** [[REG1973:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1972]], <2 x double>* [[REG1974:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1975:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1973]], align 8
-// CHECK-NEXT: [[REG1976:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1974]], align 16
-// CHECK-NEXT: call void @_mm_store_sd(double* noundef [[REG1975]], <2 x double> noundef [[REG1976]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storer_pd(double* noundef [[REG1977:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1978:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1977]], double** [[REG1979:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1978]], <2 x double>* [[REG1980:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1981:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1979]], align 8
-// CHECK-NEXT: [[REG1982:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1980]], align 16
-// CHECK-NEXT: [[REG1983:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1980]], align 16
-// CHECK-NEXT: [[REG1984:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1982]] to <2 x i64>
-// CHECK-NEXT: [[REG1985:[0-9a-zA-Z_%.]+]] = bitcast <2 x double> [[REG1983]] to <2 x i64>
-// CHECK-NEXT: [[REG1986:[0-9a-zA-Z_%.]+]] = shufflevector <2 x i64> [[REG1984]], <2 x i64> [[REG1985]], <2 x i32> <i32 1, i32 2>
-// CHECK-NEXT: [[REG1987:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1986]] to <2 x double>
-// CHECK-NEXT: call void @_mm_store_pd(double* noundef [[REG1981]], <2 x double> noundef [[REG1987]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storeu_pd(double* noundef [[REG1988:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG1989:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG1988]], double** [[REG1990:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG1989]], <2 x double>* [[REG1991:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1992:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG1991]], align 16
-// CHECK-NEXT: [[REG1993:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG1990]], align 8
-// CHECK-NEXT: [[REG1994:[0-9a-zA-Z_%.]+]] = bitcast double* [[REG1993]] to <2 x double>*
-// CHECK-NEXT: store <2 x double> [[REG1992]], <2 x double>* [[REG1994]], align 1
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storeu_si128(<2 x i64>* noundef [[REG1995:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG1996:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64>* [[REG1995]], <2 x i64>** [[REG1997:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x i64> [[REG1996]], <2 x i64>* [[REG1998:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1999:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG1998]], align 16
-// CHECK-NEXT: [[REG2000:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG1997]], align 8
-// CHECK-NEXT: store <2 x i64> [[REG1999]], <2 x i64>* [[REG2000]], align 1
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally void @_mm_store_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast double* %[[ADDR]] to <16 x i8>*
+// CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, <16 x i8>* noundef %[[CAST]])
+
+// CHECK-LABEL: define available_externally void @_mm_store_pd1
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load <2 x double>, <2 x double>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call void @_mm_store1_pd(double* noundef %[[ADDR]], <2 x double> noundef %[[ADDR2]])
+
+// CHECK-LABEL: define available_externally void @_mm_store_sd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: store double %{{[0-9a-zA-Z_.]+}}, double* %[[ADDR]], align 8
+
+// CHECK-LABEL: define available_externally void @_mm_store_si128
+// CHECK: %[[LOAD:[0-9a-zA-Z_.]+]] = load <2 x i64>*, <2 x i64>** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <2 x i64>* %[[LOAD]] to <16 x i8>*
+// CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, <16 x i8>* noundef %[[CAST]])
+
+// CHECK-LABEL: define available_externally void @_mm_store1_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, <2 x double>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %[[VAL]], i32 noundef zeroext 0)
+// CHECK: call void @_mm_store_pd(double* noundef %[[ADDR]], <2 x double> noundef %[[CALL]])
+
+// CHECK-LABEL: define available_externally void @_mm_storeh_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: store double %{{[0-9a-zA-Z_.]+}}, double* %[[ADDR]], align 8
+
+// CHECK-LABEL: define available_externally void @_mm_storel_epi64
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load <2 x i64>*, <2 x i64>** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <2 x i64>* %[[ADDR]] to i64*
+// CHECK: store i64 %{{[0-9a-zA-Z_.]+}}, i64* %[[CAST]], align 8
+
+// CHECK-LABEL: define available_externally void @_mm_storel_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, <2 x double>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call void @_mm_store_sd(double* noundef %[[ADDR]], <2 x double> noundef %[[VAL]])
+
+// CHECK-LABEL: define available_externally void @_mm_storer_pd
+// CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
+// CHECK: call void @_mm_store_pd(double* noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally void @_mm_storeu_pd
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load double*, double** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast double* %[[ADDR]] to <2 x double>*
+// CHECK: store <2 x double> %{{[0-9a-zA-Z_.]+}}, <2 x double>* %[[CAST]], align 1
+
+// CHECK-LABEL: define available_externally void @_mm_storeu_si128
+// CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load <2 x i64>*, <2 x i64>** %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: store <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64>* %[[ADDR]], align 1
 
 void __attribute__((noinline))
 test_stream() {
@@ -3021,46 +1354,17 @@ test_stream() {
 
 // CHECK-LABEL: @test_stream
 
-// CHECK: define available_externally void @_mm_stream_pd(double* noundef [[REG2001:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG2002:[0-9a-zA-Z_%.]+]])
-// CHECK: store double* [[REG2001]], double** [[REG2003:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x double> [[REG2002]], <2 x double>* [[REG2004:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG2005:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG2003]], align 8
-// CHECK-NEXT: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(double* [[REG2005]])
-// CHECK-NEXT: [[REG2006:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2004]], align 16
-// CHECK-NEXT: [[REG2007:[0-9a-zA-Z_%.]+]] = load double*, double** [[REG2003]], align 8
-// CHECK-NEXT: [[REG2008:[0-9a-zA-Z_%.]+]] = bitcast double* [[REG2007]] to <2 x double>*
-// CHECK-NEXT: store <2 x double> [[REG2006]], <2 x double>* [[REG2008]], align 16
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_stream_si128(<2 x i64>* noundef [[REG2009:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG2010:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64>* [[REG2009]], <2 x i64>** [[REG2011:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <2 x i64> [[REG2010]], <2 x i64>* [[REG2012:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG2013:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG2011]], align 8
-// CHECK-NEXT: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(<2 x i64>* [[REG2013]])
-// CHECK-NEXT: [[REG2014:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG2012]], align 16
-// CHECK-NEXT: [[REG2015:[0-9a-zA-Z_%.]+]] = load <2 x i64>*, <2 x i64>** [[REG2011]], align 8
-// CHECK-NEXT: store <2 x i64> [[REG2014]], <2 x i64>* [[REG2015]], align 16
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_stream_si32(i32* noundef [[REG2016:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG2017:[0-9a-zA-Z_%.]+]])
-// CHECK: store i32* [[REG2016]], i32** [[REG2018:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i32 [[REG2017]], i32* [[REG2019:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG2020:[0-9a-zA-Z_%.]+]] = load i32*, i32** [[REG2018]], align 8
-// CHECK-NEXT: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(i32* [[REG2020]])
-// CHECK-NEXT: [[REG2021:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG2019]], align 4
-// CHECK-NEXT: [[REG2022:[0-9a-zA-Z_%.]+]] = load i32*, i32** [[REG2018]], align 8
-// CHECK-NEXT: store i32 [[REG2021]], i32* [[REG2022]], align 4
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_stream_si64(i64* noundef [[REG2023:[0-9a-zA-Z_%.]+]], i64 noundef [[REG2024:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64* [[REG2023]], i64** [[REG2025:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG2024]], i64* [[REG2026:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG2027:[0-9a-zA-Z_%.]+]] = load i64*, i64** [[REG2025]], align 8
-// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* [[REG2027]])
-// CHECK-NEXT: [[REG2028:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG2026]], align 8
-// CHECK-NEXT: [[REG2029:[0-9a-zA-Z_%.]+]] = load i64*, i64** [[REG2025]], align 8
-// CHECK-NEXT: store i64 [[REG2028]], i64* [[REG2029]], align 8
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally void @_mm_stream_pd
+// CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(double* %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally void @_mm_stream_si128
+// CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(<2 x i64>* %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally void @_mm_stream_si32
+// CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(i32* %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally void @_mm_stream_si64
+// CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* %{{[0-9a-zA-Z_.]+}})
 
 void __attribute__((noinline))
 test_sub() {
@@ -3079,66 +1383,38 @@ test_sub() {
 
 // CHECK-LABEL: @test_sub
 
-// CHECK: define available_externally <2 x i64> @_mm_sub_epi64
-// CHECK: [[REG2030:[0-9a-zA-Z_%.]+]] = sub <2 x i64> {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret <2 x i64> [[REG2030]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sub_epi32
-// CHECK: [[REG2031:[0-9a-zA-Z_%.]+]] = sub <4 x i32> {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2032:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG2031]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2032]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sub_epi16
-// CHECK: [[REG2033:[0-9a-zA-Z_%.]+]] = sub <8 x i16> {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2034:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG2033]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2034]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sub_epi8
-// CHECK: [[REG2035:[0-9a-zA-Z_%.]+]] = sub <16 x i8> {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2036:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG2035]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2036]]
-
-// CHECK: define available_externally <2 x double> @_mm_sub_pd
-// CHECK: [[REG2037:[0-9a-zA-Z_%.]+]] = fsub <2 x double> {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret <2 x double> [[REG2037]]
-
-// CHECK: define available_externally <2 x double> @_mm_sub_sd(<2 x double> noundef [[REG2038:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG2039:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG2038]], <2 x double>* [[REG2040:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG2039]], <2 x double>* [[REG2041:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG2042:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2040]], align 16
-// CHECK-NEXT: [[REG2043:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG2042]], i32 0
-// CHECK-NEXT: [[REG2044:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2041]], align 16
-// CHECK-NEXT: [[REG2045:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG2044]], i32 0
-// CHECK-NEXT: [[REG2046:[0-9a-zA-Z_%.]+]] = fsub double [[REG2043]], [[REG2045]]
-// CHECK-NEXT: [[REG2047:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2040]], align 16
-// CHECK-NEXT: [[REG2048:[0-9a-zA-Z_%.]+]] = insertelement <2 x double> [[REG2047]], double [[REG2046]], i32 0
-// CHECK-NEXT: store <2 x double> [[REG2048]], <2 x double>* [[REG2040]], align 16
-// CHECK-NEXT: [[REG2049:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2040]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG2049]]
-
-// CHECK: define available_externally i64 @_mm_sub_si64
-// CHECK: [[REG2050:[0-9a-zA-Z_%.]+]] = sub i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG2050]]
-
-// CHECK: define available_externally <2 x i64> @_mm_subs_epi16
-// CHECK: [[REG2051:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_subs(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG2052:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG2051]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2052]]
-
-// CHECK: define available_externally <2 x i64> @_mm_subs_epi8
-// CHECK: [[REG2053:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])
-// CHECK-NEXT: [[REG2054:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG2053]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2054]]
-
-// CHECK: define available_externally <2 x i64> @_mm_subs_epu16
-// CHECK: [[REG2055:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])
-// CHECK-NEXT: [[REG2056:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG2055]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2056]]
-
-// CHECK: define available_externally <2 x i64> @_mm_subs_epu8
-// CHECK: [[REG2057:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])
-// CHECK-NEXT: [[REG2058:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG2057]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2058]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi64
+// CHECK: sub <2 x i64>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi32
+// CHECK: sub <4 x i32>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi16
+// CHECK: sub <8 x i16>
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi8
+// CHECK: sub <16 x i8>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_sub_pd
+// CHECK: fsub <2 x double>
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_sub_sd
+// CHECK: fsub double
+
+// CHECK-LABEL: define available_externally i64 @_mm_sub_si64
+// CHECK: sub i64
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi16
+// CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi8
+// CHECK: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu16
+// CHECK: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu8
+// CHECK: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])
 
 void __attribute__((noinline))
 test_ucomi() {
@@ -3152,41 +1428,23 @@ test_ucomi() {
 
 // CHECK-LABEL: @test_ucomi
 
-// CHECK: define available_externally signext i32 @_mm_ucomieq_sd(<2 x double> noundef [[REG2059:[0-9a-zA-Z_%.]+]], <2 x double> noundef [[REG2060:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x double> [[REG2059]], <2 x double>* [[REG2061:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG2060]], <2 x double>* [[REG2062:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG2063:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2061]], align 16
-// CHECK-NEXT: [[REG2064:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG2063]], i32 0
-// CHECK-NEXT: [[REG2065:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2062]], align 16
-// CHECK-NEXT: [[REG2066:[0-9a-zA-Z_%.]+]] = extractelement <2 x double> [[REG2065]], i32 0
-// CHECK-NEXT: [[REG2067:[0-9a-zA-Z_%.]+]] = fcmp oeq double [[REG2064]], [[REG2066]]
-// CHECK-NEXT: [[REG2068:[0-9a-zA-Z_%.]+]] = zext i1 [[REG2067]] to i32
-// CHECK-NEXT: ret i32 [[REG2068]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomige_sd
-// CHECK: [[REG2069:[0-9a-zA-Z_%.]+]] = fcmp oge double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2070:[0-9a-zA-Z_%.]+]] = zext i1 [[REG2069]] to i32
-// CHECK-NEXT: ret i32 [[REG2070]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomigt_sd
-// CHECK: [[REG2071:[0-9a-zA-Z_%.]+]] = fcmp ogt double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2072:[0-9a-zA-Z_%.]+]] = zext i1 [[REG2071]] to i32
-// CHECK-NEXT: ret i32 [[REG2072]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomile_sd
-// CHECK: [[REG2073:[0-9a-zA-Z_%.]+]] = fcmp ole double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2074:[0-9a-zA-Z_%.]+]] = zext i1 [[REG2073]] to i32
-// CHECK-NEXT: ret i32 [[REG2074]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomilt_sd
-// CHECK: [[REG2075:[0-9a-zA-Z_%.]+]] = fcmp olt double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2076:[0-9a-zA-Z_%.]+]] = zext i1 [[REG2075]] to i32
-// CHECK-NEXT: ret i32 [[REG2076]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomineq_sd
-// CHECK: [[REG2077:[0-9a-zA-Z_%.]+]] = fcmp une double {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG2078:[0-9a-zA-Z_%.]+]] = zext i1 [[REG2077]] to i32
-// CHECK-NEXT: ret i32 [[REG2078]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_sd
+// CHECK: fcmp oeq double
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_sd
+// CHECK: fcmp oge double
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_sd
+// CHECK: fcmp ogt double
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_sd
+// CHECK: fcmp ole double
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_sd
+// CHECK: fcmp olt double
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_sd
+// CHECK: fcmp une double
 
 void __attribute__((noinline))
 test_undefined() {
@@ -3196,17 +1454,15 @@ test_undefined() {
 
 // CHECK-LABEL: @test_undefined
 
-// CHECK: define available_externally <2 x double> @_mm_undefined_pd()
-// CHECK: [[REG2079:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2080:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x double> [[REG2079]], <2 x double>* [[REG2080]], align 16
-// CHECK-NEXT: [[REG2081:[0-9a-zA-Z_%.]+]] = load <2 x double>, <2 x double>* [[REG2080]], align 16
-// CHECK-NEXT: ret <2 x double> [[REG2081]]
+// CHECK-LABEL: define available_externally <2 x double> @_mm_undefined_pd()
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, <2 x double>* %[[ADDR:[0-9a-zA-Z_.]+]], align 16
+// CHECK: store <2 x double> %[[VAL]], <2 x double>* %[[ADDR]], align 16
+// CHECK: load <2 x double>, <2 x double>* %[[ADDR]], align 16
 
-// CHECK: define available_externally <2 x i64> @_mm_undefined_si128()
-// CHECK: [[REG2082:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG2083:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG2082]], <2 x i64>* [[REG2083]], align 16
-// CHECK-NEXT: [[REG2084:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG2083]], align 16
-// CHECK-NEXT: ret <2 x i64> [[REG2084]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_undefined_si128()
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x i64>, <2 x i64>* %[[ADDR:[0-9a-zA-Z_.]+]], align 16
+// CHECK: store <2 x i64> %[[VAL]], <2 x i64>* %[[ADDR]], align 16
+// CHECK: load <2 x i64>, <2 x i64>* %[[ADDR]], align 16
 
 void __attribute__((noinline))
 test_unpack() {
@@ -3224,48 +1480,32 @@ test_unpack() {
 
 // CHECK-LABEL: @test_unpack
 
-// CHECK: define available_externally <2 x i64> @_mm_unpackhi_epi16
-// CHECK: [[REG2085:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])
-// CHECK-NEXT: [[REG2086:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG2085]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2086]]
-
-// CHECK: define available_externally <2 x i64> @_mm_unpackhi_epi32
-// CHECK: [[REG2087:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergel(unsigned int vector[4], unsigned int vector[4])
-// CHECK-NEXT: [[REG2088:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG2087]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2088]]
-
-// CHECK: define available_externally <2 x i64> @_mm_unpackhi_epi64
-// CHECK: [[REG2089:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
-// CHECK-NEXT: ret <2 x i64> [[REG2089]]
-
-// CHECK: define available_externally <2 x i64> @_mm_unpackhi_epi8
-// CHECK: [[REG2090:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
-// CHECK-NEXT: [[REG2091:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG2090]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2091]]
-
-// CHECK: define available_externally <2 x double> @_mm_unpackhi_pd
-// CHECK: [[REG2092:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergel(double vector[2], double vector[2])
-// CHECK-NEXT: ret <2 x double> [[REG2092]]
-
-// CHECK: define available_externally <2 x i64> @_mm_unpacklo_epi16
-// CHECK: [[REG2093:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(short vector[8], short vector[8])
-// CHECK-NEXT: [[REG2094:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG2093]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2094]]
-
-// CHECK: define available_externally <2 x i64> @_mm_unpacklo_epi32
-// CHECK: [[REG2095:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mergeh(int vector[4], int vector[4])
-// CHECK-NEXT: [[REG2096:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG2095]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2096]]
-
-// CHECK: define available_externally <2 x i64> @_mm_unpacklo_epi64
-// CHECK: [[REG2097:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
-// CHECK-NEXT: ret <2 x i64> [[REG2097]]
-
-// CHECK: define available_externally <2 x i64> @_mm_unpacklo_epi8
-// CHECK: [[REG2098:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergeh(unsigned char vector[16], unsigned char vector[16])
-// CHECK-NEXT: [[REG2099:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG2098]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG2099]]
-
-// CHECK: define available_externally <2 x double> @_mm_unpacklo_pd
-// CHECK: [[REG2100:[0-9a-zA-Z_%.]+]] = call <2 x double> @vec_mergeh(double vector[2], double vector[2])
-// CHECK-NEXT: ret <2 x double> [[REG2100]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi16
+// CHECK: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi32
+// CHECK: call <4 x i32> @vec_mergel(unsigned int vector[4], unsigned int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi64
+// CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi8
+// CHECK: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_unpackhi_pd
+// CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi16
+// CHECK: call <8 x i16> @vec_mergeh(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi32
+// CHECK: call <4 x i32> @vec_mergeh(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi64
+// CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi8
+// CHECK: call <16 x i8> @vec_mergeh(unsigned char vector[16], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x double> @_mm_unpacklo_pd
+// CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2])

diff  --git a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c
index 938265069a4eb..c4cf9d9f33f05 100644
--- a/clang/test/CodeGen/PowerPC/ppc-mmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
@@ -41,100 +40,42 @@ test_add() {
 
 // CHECK-LABEL: @test_add
 
-// CHECK: define available_externally i64 @_mm_add_pi32
-
-// CHECK-P9: [[REG1:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-P9-NEXT: [[REG2:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG2]], <4 x i32>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-P9-NEXT: [[REG4:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-P9-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-P9-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG5]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG6]], <4 x i32>* [[REG7:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-P9-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG3]], align 16
-// CHECK-P9-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG7]], align 16
-// CHECK-P9-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> noundef [[REG8]], <4 x i32> noundef [[REG9]])
-// CHECK-P9-NEXT: store <4 x i32> [[REG10]], <4 x i32>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-P9-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG11]], align 16
-// CHECK-P9-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> %6 to <2 x i64>
-// CHECK-P9-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG13]], i32 0
-// CHECK-P9-NEXT: ret i64 [[REG14]]
-
-// CHECK-P8: [[REG15:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-P8-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG15]]
-// CHECK-P8: [[REG17:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-P8-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG17]]
-// CHECK-P8-NEXT: add nsw i32 [[REG16]], [[REG18]]
-// CHECK-P8: [[REG19:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-P8-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG19]]
-// CHECK-P8: [[REG21:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-P8-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG21]]
-// CHECK-P8-NEXT: add nsw i32 [[REG20]], [[REG22]]
-
-// CHECK: define available_externally i64 @_mm_add_pi16
-// CHECK: [[REG23:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG23]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG24]], <8 x i16>* [[REG25:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* [[REG28:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG25]], align 16
-// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG28]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_add(short vector[8], short vector[8])(<8 x i16> noundef [[REG29]], <8 x i16> noundef [[REG30]])
-
-// CHECK: define available_externally i64 @_mm_add_pi8
-// CHECK: [[REG31:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG31]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG32]], <16 x i8>* [[REG33:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG34:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG34]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG35]], <16 x i8>* [[REG36:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG33]], align 16
-// CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG36]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG37]], <16 x i8> noundef [[REG38]])
-
-// CHECK: define available_externally i64 @_mm_adds_pu16
-// CHECK: [[REG39:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG39]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG40]], <8 x i16>* [[REG41:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG42:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG42]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG43]], <8 x i16>* [[REG44:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG41]], align 16
-// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG44]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG45]], <8 x i16> noundef [[REG46]])
-
-// CHECK: define available_externally i64 @_mm_adds_pu8
-// CHECK: [[REG47:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG47]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG48]], <16 x i8>* [[REG49:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG51:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG50]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG51]], <16 x i8>* [[REG52:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG53:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG49]], align 16
-// CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG52]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG53]], <16 x i8> noundef [[REG54]])
-
-// CHECK: define available_externally i64 @_mm_adds_pi16
-// CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG55]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG56]], <8 x i16>* [[REG57:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG58:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG58]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG59]], <8 x i16>* [[REG60:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG57]], align 16
-// CHECK-NEXT: [[REG62:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG60]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> noundef [[REG61]], <8 x i16> noundef [[REG62]])
-
-// CHECK: define available_externally i64 @_mm_adds_pi8
-// CHECK: [[REG63:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG64:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG63]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG64]], <16 x i8>* [[REG65:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG66:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
-// CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG66]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG67]], <16 x i8>* [[REG68:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG65]], align 16
-// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG68]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG69]], <16 x i8> noundef [[REG70]])
+// CHECK-LABEL: define available_externally i64 @_mm_add_pi32
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <4 x i32> @vec_add(int vector[4], int vector[4])
+// CHECK-P8: add nsw i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK-P8: add nsw i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+
+// CHECK-LABEL: define available_externally i64 @_mm_add_pi16
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <8 x i16> @vec_add(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_add_pi8
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally i64 @_mm_adds_pu16
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_adds_pu8
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally i64 @_mm_adds_pi16
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_adds_pi8
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <2 x i64> @vec_splats
+// CHECK: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])
 
 void __attribute__((noinline))
 test_alt_name_add() {
@@ -149,33 +90,26 @@ test_alt_name_add() {
 
 // CHECK-LABEL: @test_alt_name_add
 
-// CHECK: define available_externally i64 @_m_paddb
-// CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi8
-// CHECK-NEXT: ret i64 [[REG71]]
+// CHECK-LABEL: define available_externally i64 @_m_paddb
+// CHECK: call i64 @_mm_add_pi8
 
-// CHECK: define available_externally i64 @_m_paddd
-// CHECK: [[REG72:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi32
-// CHECK-NEXT: ret i64 [[REG72]]
+// CHECK-LABEL: define available_externally i64 @_m_paddd
+// CHECK: call i64 @_mm_add_pi32
 
-// CHECK: define available_externally i64 @_m_paddsb
-// CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi8
-// CHECK-NEXT: ret i64 [[REG73]]
+// CHECK-LABEL: define available_externally i64 @_m_paddsb
+// CHECK: call i64 @_mm_adds_pi8
 
-// CHECK: define available_externally i64 @_m_paddsw
-// CHECK: [[REG74:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi16
-// CHECK-NEXT: ret i64 [[REG74]]
+// CHECK-LABEL: define available_externally i64 @_m_paddsw
+// CHECK: call i64 @_mm_adds_pi16
 
-// CHECK: define available_externally i64 @_m_paddusb
-// CHECK: [[REG75:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu8
-// CHECK-NEXT: ret i64 [[REG75]]
+// CHECK-LABEL: define available_externally i64 @_m_paddusb
+// CHECK: call i64 @_mm_adds_pu8
 
-// CHECK: define available_externally i64 @_m_paddusw
-// CHECK: [[REG76:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu16
-// CHECK-NEXT: ret i64 [[REG76]]
+// CHECK-LABEL: define available_externally i64 @_m_paddusw
+// CHECK: call i64 @_mm_adds_pu16
 
-// CHECK: define available_externally i64 @_m_paddw
-// CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi16
-// CHECK-NEXT: ret i64 [[REG77]]
+// CHECK-LABEL: define available_externally i64 @_m_paddw
+// CHECK: call i64 @_mm_add_pi16
 
 void __attribute__((noinline))
 test_cmp() {
@@ -189,84 +123,41 @@ test_cmp() {
 
 // CHECK-LABEL: @test_cmp
 
-// CHECK: define available_externally i64 @_mm_cmpeq_pi32
-
-// CHECK-P9: [[REG78:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-P9-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG78]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG79]], <4 x i32>* [[REG80:[0-9a-zA-Z_%.]+]]
-// CHECK-P9: [[REG81:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-P9-NEXT: [[REG82:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG81]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG82]], <4 x i32>* [[REG83:[0-9a-zA-Z_%.]+]]
-// CHECK-P9-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG80]]
-// CHECK-P9-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG83]]
-// CHECK-P9-NEXT: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])(<4 x i32> noundef [[REG84]], <4 x i32> noundef [[REG85]])
-
-// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8: [[REG86:[0-9a-zA-Z_%.]+]] = icmp eq i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8: [[REG87:[0-9a-zA-Z_%.]+]] = select i1 [[REG86]], i32 -1, i32 0
-// CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8-NEXT: store i32 [[REG87]], i32* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}
-
-// CHECK: define available_externally i64 @_mm_cmpeq_pi16
-// CHECK: [[REG88:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG88]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG89]], <8 x i16>* [[REG90:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG92:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG91]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG92]], <8 x i16>* [[REG93:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG90]], align 16
-// CHECK-NEXT: [[REG95:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG93]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])(<8 x i16> noundef [[REG94]], <8 x i16> noundef [[REG95]])
-
-// CHECK: define available_externally i64 @_mm_cmpeq_pi8
+// CHECK-LABEL: define available_externally i64 @_mm_cmpeq_pi32
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])
+// CHECK-P8: %[[CMP1:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK-P8: select i1 %[[CMP1]], i32 -1, i32 0
+// CHECK-P8: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK-P8: select i1 %[[CMP2]], i32 -1, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_cmpeq_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_cmpeq_pi8
 // CHECK: call i64 asm "cmpb $0,$1,$2;\0A", "=r,r,r"
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG96:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG96]], align 8
-// CHECK-NEXT: ret i64 [[REG97]]
-
-// CHECK: define available_externally i64 @_mm_cmpgt_pi32
-
-// CHECK-P9: [[REG98:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-P9-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG98]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG99]], <4 x i32>* [[REG100:[0-9a-zA-Z_%.]+]]
-// CHECK-P9: [[REG101:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-P9-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG101]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG102]], <4 x i32>* [[REG103:[0-9a-zA-Z_%.]+]]
-// CHECK-P9-NEXT: [[REG104:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG100]]
-// CHECK-P9-NEXT: [[REG105:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG103]]
-// CHECK-P9-NEXT: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])(<4 x i32> noundef [[REG104]], <4 x i32> noundef [[REG85]])
-
-// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8: [[REG106:[0-9a-zA-Z_%.]+]] = icmp sgt i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8: [[REG107:[0-9a-zA-Z_%.]+]] = select i1 [[REG106]], i32 -1, i32 0
-// CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8-NEXT: store i32 [[REG107]], i32* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
-// CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}
-
-// CHECK: define available_externally i64 @_mm_cmpgt_pi16
-// CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG109:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG108]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG109]], <8 x i16>* [[REG110:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG112:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG111]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG112]], <8 x i16>* [[REG113:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG114:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG110]]
-// CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG113]]
-// CHECK-NEXT: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> noundef [[REG114]], <8 x i16> noundef [[REG115]])
-
-// CHECK: define available_externally i64 @_mm_cmpgt_pi8
-// CHECK: [[REG116:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG117:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG116]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG117]], <16 x i8>* [[REG118:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG119:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG120:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG119]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG120]], <16 x i8>* [[REG121:[0-9a-zA-Z_%.]+]]
-// CHECK-NEXT: [[REG122:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG118]]
-// CHECK-NEXT: [[REG123:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG121]]
-// CHECK-NEXT: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG122]], <16 x i8> noundef [[REG123]])
+
+// CHECK-LABEL: define available_externally i64 @_mm_cmpgt_pi32
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])
+// CHECK-P8: %[[CMP1:[0-9a-zA-Z_.]+]] = icmp sgt i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK-P8: select i1 %[[CMP1]], i32 -1, i32 0
+// CHECK-P8: [[CMP2:[0-9a-zA-Z_.]+]] = icmp sgt i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK-P8: select i1 %[[CMP2]], i32 -1, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_cmpgt_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_cmpgt_pi8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])
 
 void __attribute__((noinline))
 test_alt_name_cmp() {
@@ -280,29 +171,23 @@ test_alt_name_cmp() {
 
 // CHECK-LABEL: @test_alt_name_cmp
 
-// CHECK: define available_externally i64 @_m_pcmpeqb
-// CHECK: [[REG124:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8
-// CHECK-NEXT: ret i64 [[REG124]]
+// CHECK-LABEL: define available_externally i64 @_m_pcmpeqb
+// CHECK: call i64 @_mm_cmpeq_pi8
 
-// CHECK: define available_externally i64 @_m_pcmpeqd
-// CHECK: [[REG125:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi32
-// CHECK-NEXT: ret i64 [[REG125]]
+// CHECK-LABEL: define available_externally i64 @_m_pcmpeqd
+// CHECK: call i64 @_mm_cmpeq_pi32
 
-// CHECK: define available_externally i64 @_m_pcmpeqw
-// CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi16
-// CHECK-NEXT: ret i64 [[REG126]]
+// CHECK-LABEL: define available_externally i64 @_m_pcmpeqw
+// CHECK: call i64 @_mm_cmpeq_pi16
 
-// CHECK: define available_externally i64 @_m_pcmpgtb
-// CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi8
-// CHECK-NEXT: ret i64 [[REG127]]
+// CHECK-LABEL: define available_externally i64 @_m_pcmpgtb
+// CHECK: call i64 @_mm_cmpgt_pi8
 
-// CHECK: define available_externally i64 @_m_pcmpgtd
-// CHECK: [[REG128:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi32
-// CHECK-NEXT: ret i64 [[REG128]]
+// CHECK-LABEL: define available_externally i64 @_m_pcmpgtd
+// CHECK: call i64 @_mm_cmpgt_pi32
 
-// CHECK: define available_externally i64 @_m_pcmpgtw
-// CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi16
-// CHECK-NEXT: ret i64 [[REG129]]
+// CHECK-LABEL: define available_externally i64 @_m_pcmpgtw
+// CHECK: call i64 @_mm_cmpgt_pi16
 
 void __attribute__((noinline))
 test_convert() {
@@ -314,23 +199,23 @@ test_convert() {
 
 // CHECK-LABEL: @test_convert
 
-// CHECK: define available_externally i64 @_mm_cvtm64_si64
-// CHECK: [[REG130:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: ret i64 [[REG130]]
+// CHECK-LABEL: define available_externally i64 @_mm_cvtm64_si64
+// CHECK: %[[RESULT:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK-NEXT: ret i64 %[[RESULT]]
 
-// CHECK: define available_externally i64 @_mm_cvtsi32_si64
-// CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG132:[0-9a-zA-Z_%.]+]] = zext i32 [[REG131]] to i64
-// CHECK-NEXT: ret i64 [[REG132]]
+// CHECK-LABEL: define available_externally i64 @_mm_cvtsi32_si64
+// CHECK: %[[LOAD:[0-9a-zA-Z_.]+]] = load i32, i32* %{{[0-9a-zA-Z_.]+}}
+// CHECK-NEXT: %[[RESULT:[0-9a-zA-Z_.]+]] = zext i32 %[[LOAD]] to i64
+// CHECK-NEXT: ret i64 %[[RESULT]]
 
-// CHECK: define available_externally i64 @_mm_cvtsi64_m64
-// CHECK: [[REG133:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: ret i64 [[REG133]]
+// CHECK-LABEL: define available_externally i64 @_mm_cvtsi64_m64
+// CHECK: %[[RESULT:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK-NEXT: ret i64 %[[RESULT]]
 
-// CHECK: define available_externally signext i32 @_mm_cvtsi64_si32
-// CHECK: [[REG134:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG135:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG134]] to i32
-// CHECK-NEXT: ret i32 [[REG135]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvtsi64_si32
+// CHECK: %[[LOAD:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK-NEXT: %[[RESULT:[0-9a-zA-Z_.]+]] = trunc i64 %[[LOAD]] to i32
+// CHECK-NEXT: ret i32 %[[RESULT]]
 
 void __attribute__((noinline))
 test_alt_name_convert() {
@@ -342,21 +227,19 @@ test_alt_name_convert() {
 
 // CHECK-LABEL: @test_alt_name_convert
 
-// CHECK: define available_externally i64 @_m_from_int
-// CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtsi32_si64
-// CHECK-NEXT: ret i64 [[REG136]]
+// CHECK-LABEL: define available_externally i64 @_m_from_int
+// CHECK: call i64 @_mm_cvtsi32_si64
 
-// CHECK: define available_externally i64 @_m_from_int64
-// CHECK: [[REG137:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG137]]
+// CHECK-LABEL: define available_externally i64 @_m_from_int64
+// CHECK: %[[RESULT:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}
+// CHECK-NEXT: ret i64 %[[RESULT]]
 
-// CHECK: define available_externally signext i32 @_m_to_int
-// CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtsi64_si32
-// CHECK-NEXT: ret i32 [[REG138]]
+// CHECK-LABEL: define available_externally signext i32 @_m_to_int
+// CHECK: call signext i32 @_mm_cvtsi64_si32
 
-// CHECK: define available_externally i64 @_m_to_int64
-// CHECK: [[REG139:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG139]]
+// CHECK-LABEL: define available_externally i64 @_m_to_int64
+// CHECK: %[[RESULT:[0-9a-zA-Z_.]+]] = load i64, i64* %{{[0-9a-zA-Z_.]+}}
+// CHECK-NEXT: ret i64 %[[RESULT]]
 
 void __attribute__((noinline))
 test_empty() {
@@ -366,11 +249,11 @@ test_empty() {
 
 // CHECK-LABEL: @test_empty
 
-// CHECK: define available_externally void @_mm_empty
+// CHECK-LABEL: define available_externally void @_mm_empty
 // CHECK-NEXT: entry
 // CHECK-NEXT: ret void
 
-// CHECK: define available_externally void @_m_empty
+// CHECK-LABEL: define available_externally void @_m_empty
 // CHECK-NEXT: entry
 // CHECK-NEXT: ret void
 
@@ -384,22 +267,18 @@ test_logic() {
 
 // CHECK-LABEL: @test_logic
 
-// CHECK: define available_externally i64 @_mm_and_si64
-// CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = and i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG140]]
+// CHECK-LABEL: define available_externally i64 @_mm_and_si64
+// CHECK: and i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
 
-// CHECK: define available_externally i64 @_mm_andnot_si64
-// CHECK: [[REG141:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, -1
-// CHECK: [[REG142:[0-9a-zA-Z_%.]+]] = and i64 [[REG141]], {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG142]]
+// CHECK-LABEL: define available_externally i64 @_mm_andnot_si64
+// CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1
+// CHECK: and i64 %[[XOR]], %{{[0-9a-zA-Z_.]+}}
 
-// CHECK: define available_externally i64 @_mm_or_si64
-// CHECK: [[REG143:[0-9a-zA-Z_%.]+]] = or i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG143]]
+// CHECK-LABEL: define available_externally i64 @_mm_or_si64
+// CHECK: or i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
 
-// CHECK: define available_externally i64 @_mm_xor_si64
-// CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG144]]
+// CHECK-LABEL: define available_externally i64 @_mm_xor_si64
+// CHECK: xor i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
 
 void __attribute__((noinline))
 test_alt_name_logic() {
@@ -411,21 +290,17 @@ test_alt_name_logic() {
 
 // CHECK-LABEL: @test_alt_name_logic
 
-// CHECK: define available_externally i64 @_m_pand
-// CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call i64 @_mm_and_si64
-// CHECK-NEXT: ret i64 [[REG145]]
+// CHECK-LABEL: define available_externally i64 @_m_pand
+// CHECK: call i64 @_mm_and_si64
 
-// CHECK: define available_externally i64 @_m_pandn
-// CHECK: [[REG146:[0-9a-zA-Z_%.]+]] = call i64 @_mm_andnot_si64
-// CHECK-NEXT: ret i64 [[REG146]]
+// CHECK-LABEL: define available_externally i64 @_m_pandn
+// CHECK: call i64 @_mm_andnot_si64
 
-// CHECK: define available_externally i64 @_m_por
-// CHECK: [[REG147:[0-9a-zA-Z_%.]+]] = call i64 @_mm_or_si64
-// CHECK-NEXT: ret i64 [[REG147]]
+// CHECK-LABEL: define available_externally i64 @_m_por
+// CHECK: call i64 @_mm_or_si64
 
-// CHECK: define available_externally i64 @_m_pxor
-// CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call i64 @_mm_xor_si64
-// CHECK-NEXT: ret i64 [[REG148]]
+// CHECK-LABEL: define available_externally i64 @_m_pxor
+// CHECK: call i64 @_mm_xor_si64
 
 void __attribute__((noinline))
 test_madd() {
@@ -435,23 +310,13 @@ test_madd() {
 
 // CHECK-LABEL: @test_madd
 
-// CHECK: define available_externally i64 @_mm_madd_pi16
-// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* [[REG149:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG150:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG150]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG151]], <8 x i16>* [[REG152:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG153:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG153]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG154]], <8 x i16>* [[REG155:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG156:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG152]], align 16
-// CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG155]], align 16
-// CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG149]], align 16
-// CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmsumshm(<8 x i16> noundef [[REG156]], <8 x i16> noundef [[REG157]], <4 x i32> noundef [[REG158]])
-// CHECK-NEXT: store <4 x i32> [[REG159]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-
-// CHECK: define available_externally i64 @_m_pmaddwd
-// CHECK: [[REG160:[0-9a-zA-Z_%.]+]] = call i64 @_mm_madd_pi16
-// CHECK-NEXT: ret i64 [[REG160]]
+// CHECK-LABEL: define available_externally i64 @_mm_madd_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <4 x i32> @vec_vmsumshm
+
+// CHECK-LABEL: define available_externally i64 @_m_pmaddwd
+// CHECK: call i64 @_mm_madd_pi16
 
 void __attribute__((noinline))
 test_mul() {
@@ -461,40 +326,19 @@ test_mul() {
 
 // CHECK-LABEL: @test_mul
 
-// CHECK: define available_externally i64 @_mm_mulhi_pi16
-// CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK: store <8 x i16> {{[0-9a-zA-Z_%.]+}}, <8 x i16>* [[REG162:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG163:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG164:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG164]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG165]], <8 x i16>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16
-// CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16
-// CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulesh(<8 x i16> noundef [[REG167]], <8 x i16> noundef [[REG168]])
-// CHECK-NEXT: store <4 x i32> [[REG169]], <4 x i32>* [[REG170:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG171:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16
-// CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16
-// CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulosh(<8 x i16> noundef [[REG171]], <8 x i16> noundef [[REG172]])
-// CHECK-NEXT: store <4 x i32> [[REG173]], <4 x i32>* [[REG174:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG170]], align 16
-// CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG174]], align 16
-// CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG175]], <4 x i32> noundef [[REG176]], <16 x i8> noundef [[REG177]])
-
-// CHECK: define available_externally i64 @_mm_mullo_pi16
-// CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG178]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG179]], <8 x i16>* [[REG180:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG182]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG183]], <8 x i16>* [[REG184:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG180]], align 16
-// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG184]], align 16
-// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = mul <8 x i16> [[REG185]], [[REG186]]
-// CHECK-NEXT: store <8 x i16> [[REG187]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LABEL: define available_externally i64 @_mm_mulhi_pi16
+// CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>
+// CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <4 x i32> @vec_vmulesh
+// CHECK: call <4 x i32> @vec_vmulosh
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally i64 @_mm_mullo_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: mul <8 x i16> %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
 
 void __attribute__((noinline))
 test_alt_name_mul() {
@@ -504,13 +348,11 @@ test_alt_name_mul() {
 
 // CHECK-LABEL: @test_alt_name_mul
 
-// CHECK: define available_externally i64 @_m_pmulhw
-// CHECK: [[REG188:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pi16
-// CHECK-NEXT: ret i64 [[REG188]]
+// CHECK-LABEL: define available_externally i64 @_m_pmulhw
+// CHECK: call i64 @_mm_mulhi_pi16
 
-// CHECK: define available_externally i64 @_m_pmullw
-// CHECK: [[REG189:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mullo_pi16
-// CHECK-NEXT: ret i64 [[REG189]]
+// CHECK-LABEL: define available_externally i64 @_m_pmullw
+// CHECK: call i64 @_mm_mullo_pi16
 
 void __attribute__((noinline))
 test_packs() {
@@ -521,45 +363,17 @@ test_packs() {
 
 // CHECK-LABEL: @test_packs
 
-// CHECK: define available_externally i64 @_mm_packs_pu16(i64 noundef [[REG190:[0-9a-zA-Z_%.]+]], i64 noundef [[REG191:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG190]], i64* [[REG192:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG191]], i64* [[REG193:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-LE: load i64, i64* [[REG192]], align 8
-// CHECK: load i64, i64* [[REG193]], align 8
-// CHECK-BE: load i64, i64* [[REG192]], align 8
-// CHECK: [[REG194:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt
-// CHECK-NEXT: store <8 x i16> [[REG194]], <8 x i16>* [[REG195:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197]], align 16
-// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG196]], <8 x i16> noundef [[REG198]])
-// CHECK-NEXT: store <16 x i8> [[REG199]], <16 x i8>* [[REG200:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16
-// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16
-// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(bool vector[8], bool vector[8])(<8 x i16> noundef [[REG201]], <8 x i16> noundef [[REG202]])
-// CHECK-NEXT: store <16 x i8> [[REG203]], <16 x i8>* [[REG204:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG200]], align 16
-// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG204]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> noundef [[REG205]], <16 x i8> noundef zeroinitializer, <16 x i8> noundef [[REG206]])
-
-// CHECK: define available_externally i64 @_mm_packs_pi16(i64 noundef [[REG207:[0-9a-zA-Z_%.]+]], i64 noundef [[REG208:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG207]], i64* [[REG209:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG208]], i64* [[REG210:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-LE: load i64, i64* [[REG209]], align 8
-// CHECK: load i64, i64* [[REG210]], align 8
-// CHECK-BE: load i64, i64* [[REG209]], align 8
-// CHECK: [[REG211:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_packs(short vector[8], short vector[8])(<8 x i16> noundef [[REG211]], <8 x i16> noundef [[REG213]])
-
-// CHECK: define available_externally i64 @_mm_packs_pi32(i64 noundef [[REG214:[0-9a-zA-Z_%.]+]], i64 noundef [[REG215:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG214]], i64* [[REG216:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG215]], i64* [[REG217:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-LE: load i64, i64* [[REG216]], align 8
-// CHECK: load i64, i64* [[REG217]], align 8
-// CHECK-BE: load i64, i64* [[REG216]], align 8
-// CHECK: [[REG218:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_packs(int vector[4], int vector[4])(<4 x i32> noundef [[REG218]], <4 x i32> noundef [[REG220]])
+// CHECK-LABEL: define available_externally i64 @_mm_packs_pu16
+// CHECK: call <8 x i16> @vec_cmplt
+// CHECK: call <16 x i8> @vec_packs(unsigned short vector[8], unsigned short vector[8])
+// CHECK: call <16 x i8> @vec_pack(bool vector[8], bool vector[8])
+// CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally i64 @_mm_packs_pi16
+// CHECK: call <16 x i8> @vec_packs(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_packs_pi32
+// CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
 
 void __attribute__((noinline))
 test_alt_name_packs() {
@@ -570,17 +384,14 @@ test_alt_name_packs() {
 
 // CHECK-LABEL: @test_alt_name_packs
 
-// CHECK: define available_externally i64 @_m_packssdw
-// CHECK: [[REG221:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi32
-// CHECK-NEXT: ret i64 [[REG221]]
+// CHECK-LABEL: define available_externally i64 @_m_packssdw
+// CHECK: call i64 @_mm_packs_pi32
 
-// CHECK: define available_externally i64 @_m_packsswb
-// CHECK: [[REG222:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi16
-// CHECK-NEXT: ret i64 [[REG222]]
+// CHECK-LABEL: define available_externally i64 @_m_packsswb
+// CHECK: call i64 @_mm_packs_pi16
 
-// CHECK: define available_externally i64 @_m_packuswb
-// CHECK: [[REG223:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pu16
-// CHECK-NEXT: ret i64 [[REG223]]
+// CHECK-LABEL: define available_externally i64 @_m_packuswb
+// CHECK: call i64 @_mm_packs_pu16
 
 void __attribute__((noinline))
 test_set() {
@@ -591,39 +402,14 @@ test_set() {
 
 // CHECK-LABEL: @test_set
 
-// CHECK: define available_externally i64 @_mm_set_pi32
-// CHECK: [[REG224:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG224]]
-// CHECK: [[REG225:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG225]]
-
-// CHECK: define available_externally i64 @_mm_set_pi16
-// CHECK: [[REG226:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG226]]
-// CHECK: [[REG227:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG227]]
-// CHECK: [[REG228:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
-// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG228]]
-// CHECK: [[REG229:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
-// CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG229]]
-
-// CHECK: define available_externally i64 @_mm_set_pi8
-// CHECK: [[REG230:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG230]]
-// CHECK: [[REG231:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG231]]
-// CHECK: [[REG232:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG232]]
-// CHECK: [[REG233:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG233]]
-// CHECK: [[REG234:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 4
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG234]]
-// CHECK: [[REG235:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 5
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG235]]
-// CHECK: [[REG236:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 6
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG236]]
-// CHECK: [[REG237:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 7
-// CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG237]]
+// CHECK-LABEL: define available_externally i64 @_mm_set_pi32
+// CHECK-COUNT-2: store i32 %{{[0-9a-zA-Z_.]+}}, i32*
+
+// CHECK-LABEL: define available_externally i64 @_mm_set_pi16
+// CHECK-COUNT-4: store i16 %{{[0-9a-zA-Z_.]+}}, i16*
+
+// CHECK-LABEL: define available_externally i64 @_mm_set_pi8
+// CHECK-COUNT-8: store i8 %{{[0-9a-zA-Z_.]+}}, i8*
 
 void __attribute__((noinline))
 test_set1() {
@@ -634,43 +420,26 @@ test_set1() {
 
 // CHECK-LABEL: @test_set1
 
-// CHECK: define available_externally i64 @_mm_set1_pi32
-// CHECK: [[REG244:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG244]], align 8
-// CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK: [[REG246:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG246]], align 4
-
-// CHECK: define available_externally i64 @_mm_set1_pi16
-
-// CHECK-P9: [[REG247:[0-9a-zA-Z_%.]+]] = load i16, i16* {{[0-9a-zA-Z_%.]+}}, align 2
-// CHECK-P9-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(short)(i16 noundef signext [[REG247]])
-// CHECK-P9-NEXT: store <8 x i16> %call, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-P9-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-P9-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG250]] to <2 x i64>
-// CHECK-P9-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG251]], i32 0
-// CHECK-P9-NEXT: ret i64 [[REG252]]
-
-// CHECK-P8: [[REG253:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254:[0-9a-zA-Z_%.]+]], align 2
-// CHECK-P8: [[REG255:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-P8-NEXT: store i16 [[REG253]], i16* [[REG255]], align 8
-// CHECK-P8-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
-// CHECK-P8: [[REG256:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-P8-NEXT: store i16 [[REG250]], i16* [[REG256]], align 2
-// CHECK-P8-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
-// CHECK-P8: [[REG257:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
-// CHECK-P8-NEXT: store i16 [[REG251]], i16* [[REG257]], align 4
-// CHECK-P8-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
-// CHECK-P8: [[REG259:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
-// CHECK-P8-NEXT: store i16 [[REG258]], i16* [[REG259]], align 2
-
-// CHECK: define available_externally i64 @_mm_set1_pi8
-// CHECK: [[REG260:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(signed char)(i8 noundef signext {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: store <16 x i8> [[REG260]], <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> %1 to <2 x i64>
-// CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG263]], i32 0
-// CHECK-NEXT: ret i64 [[REG264]]
+// CHECK-LABEL: define available_externally i64 @_mm_set1_pi32
+// CHECK-COUNT-2: store i32 %{{[0-9a-zA-Z_.]+}}, i32*
+
+// CHECK-LABEL: define available_externally i64 @_mm_set1_pi16
+// CHECK-P9: call <8 x i16> @vec_splats(short)
+// CHECK-P9: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK-P8: %[[ADDR1:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK-P8: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR1]], align 8
+// CHECK-P8: %[[ADDR2:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK-P8: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR2]], align 2
+// CHECK-P8: %[[ADDR3:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
+// CHECK-P8: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR3]], align 4
+// CHECK-P8: %[[ADDR4:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
+// CHECK-P8: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR4]], align 2
+
+
+// CHECK-LABEL: define available_externally i64 @_mm_set1_pi8
+// CHECK: call <16 x i8> @vec_splats(signed char)
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
 void __attribute__((noinline))
 test_setr() {
@@ -681,21 +450,17 @@ test_setr() {
 
 // CHECK-LABEL: @test_setr
 
-// CHECK: define available_externally i64 @_mm_setr_pi32
-// CHECK: [[REG265:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK: [[REG266:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i32 [[REG265:[0-9a-zA-Z_%.]+]], i32* [[REG266]], align 8
-// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK: [[REG268:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i32 [[REG267]], i32* [[REG268]], align 4
+// CHECK-LABEL: define available_externally i64 @_mm_setr_pi32
+// CHECK: %[[ADDR1:[0-9a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: store i32 %{{[0-9a-zA-Z_.]+}}, i32* %[[ADDR1]], align 8
+// CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: store i32 %{{[0-9a-zA-Z_.]+}}, i32* %[[ADDR2]], align 4
 
-// CHECK: define available_externally i64 @_mm_setr_pi16
-// CHECK: [[REG269:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi16
-// CHECK-NEXT: ret i64 [[REG269]]
+// CHECK-LABEL: define available_externally i64 @_mm_setr_pi16
+// CHECK: call i64 @_mm_set_pi16
 
-// CHECK: define available_externally i64 @_mm_setr_pi8
-// CHECK: [[REG270:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi8
-// CHECK-NEXT: ret i64 [[REG270]]
+// CHECK-LABEL: define available_externally i64 @_mm_setr_pi8
+// CHECK: call i64 @_mm_set_pi8
 
 void __attribute__((noinline))
 test_setzero() {
@@ -704,7 +469,7 @@ test_setzero() {
 
 // CHECK-LABEL: @test_setzero
 
-// CHECK: define available_externally i64 @_mm_setzero_si64
+// CHECK-LABEL: define available_externally i64 @_mm_setzero_si64
 // CHECK: entry
 // CHECK-NEXT: ret i64 0
 
@@ -720,56 +485,35 @@ test_sll() {
 
 // CHECK-LABEL: @test_sll
 
-// CHECK: define available_externally i64 @_mm_sll_pi16
-// CHECK: [[REG271:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
-// CHECK-NEXT: br i1 [[REG271]], label %[[REG272:[0-9a-zA-Z_.]+]], label %[[REG273:[0-9a-zA-Z_.]+]]
-// CHECK: [[REG272]]
+// CHECK-LABEL: define available_externally i64 @_mm_sll_pi16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp ule i64 %{{[0-9a-zA-Z_.]+}}, 15
+// CHECK-NEXT: br i1 %[[CMP]]
 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
-// CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i16
-// CHECK-NEXT: call <8 x i16> @vec_splats(unsigned short)
+// CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i16
+// CHECK: call <8 x i16> @vec_splats(unsigned short)
 // CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])
-// CHECK: store i64 [[REG274:[0-9a-zA-Z_%.]+]], i64* [[REG275:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: br label %[[REG276:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG273]]
-// CHECK-NEXT: store i64 0, i64* [[REG275]], align 8
-// CHECK-NEXT: br label %[[REG276]]
-// CHECK: [[REG276]]
-// CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG275]], align 8
-// CHECK-NEXT: ret i64 [[REG277]]
-
-// CHECK: define available_externally i64 @_mm_sll_pi32
-// CHECK: [[REG278:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG278]]
-// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG280]] to i32
-// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = shl i32 [[REG279]], [[REG281]]
-// CHECK: [[REG283:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i32 [[REG282]], i32* [[REG283]]
-// CHECK: [[REG284:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG284]], align 4
-// CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i32
-// CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = shl i32 [[REG285]], {{[0-9a-zA-Z_%.]+}}
-// CHECK: [[REG287:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i32 [[REG286]], i32* [[REG287]], align 4
-
-// CHECK: define available_externally i64 @_mm_sll_si64
-// CHECK: [[REG288:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG288]]
-
-// CHECK: define available_externally i64 @_mm_slli_pi16
-// CHECK: [[REG289:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16(i64 noundef {{[0-9a-zA-Z_%.]+}}, i64 noundef [[REG289]])
-// CHECK-NEXT: ret i64 [[REG290]]
-
-// CHECK: define available_externally i64 @_mm_slli_pi32
-// CHECK: [[REG291:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32(i64 noundef {{[0-9a-zA-Z_%.]+}}, i64 noundef [[REG291]])
-// CHECK-NEXT: ret i64 [[REG292]]
-
-// CHECK: define available_externally i64 @_mm_slli_si64
-// CHECK: [[REG293:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, [[REG293]]
-// CHECK-NEXT: ret i64 [[REG294]]
+// CHECK: store i64 0, i64*
+
+// CHECK-LABEL: define available_externally i64 @_mm_sll_pi32
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: shl i32 %{{[0-9a-zA-Z_.]+}}, %[[TRUNC]]
+// CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: shl i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+
+// CHECK-LABEL: define available_externally i64 @_mm_sll_si64
+// CHECK: shl i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+
+// CHECK-LABEL: define available_externally i64 @_mm_slli_pi16
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: call i64 @_mm_sll_pi16(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %[[EXT]])
+
+// CHECK-LABEL: define available_externally i64 @_mm_slli_pi32
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: call i64 @_mm_sll_pi32(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %[[EXT]])
+
+// CHECK-LABEL: define available_externally i64 @_mm_slli_si64
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: shl i64 %{{[0-9a-zA-Z_.]+}}, %[[EXT]]
 
 void __attribute__((noinline))
 test_alt_name_sll() {
@@ -783,29 +527,23 @@ test_alt_name_sll() {
 
 // CHECK-LABEL: @test_alt_name_sll
 
-// CHECK: define available_externally i64 @_m_pslld
-// CHECK: [[REG295:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32
-// CHECK-NEXT: ret i64 [[REG295]]
+// CHECK-LABEL: define available_externally i64 @_m_pslld
+// CHECK: call i64 @_mm_sll_pi32
 
-// CHECK: define available_externally i64 @_m_pslldi
-// CHECK: [[REG296:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi32
-// CHECK-NEXT: ret i64 [[REG296]]
+// CHECK-LABEL: define available_externally i64 @_m_pslldi
+// CHECK: call i64 @_mm_slli_pi32
 
-// CHECK: define available_externally i64 @_m_psllq
-// CHECK: [[REG297:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_si64
-// CHECK-NEXT: ret i64 [[REG297]]
+// CHECK-LABEL: define available_externally i64 @_m_psllq
+// CHECK: call i64 @_mm_sll_si64
 
-// CHECK: define available_externally i64 @_m_psllqi
-// CHECK: [[REG298:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_si64
-// CHECK-NEXT: ret i64 [[REG298]]
+// CHECK-LABEL: define available_externally i64 @_m_psllqi
+// CHECK: call i64 @_mm_slli_si64
 
-// CHECK: define available_externally i64 @_m_psllw
-// CHECK: [[REG299:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16
-// CHECK-NEXT: ret i64 [[REG299]]
+// CHECK-LABEL: define available_externally i64 @_m_psllw
+// CHECK: call i64 @_mm_sll_pi16
 
-// CHECK: define available_externally i64 @_m_psllwi
-// CHECK: [[REG300:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi16
-// CHECK-NEXT: ret i64 [[REG300]]
+// CHECK-LABEL: define available_externally i64 @_m_psllwi
+// CHECK: call i64 @_mm_slli_pi16
 
 void __attribute__((noinline))
 test_sra() {
@@ -817,54 +555,28 @@ test_sra() {
 
 // CHECK-LABEL: @test_sra
 
-// CHECK: define available_externally i64 @_mm_sra_pi32
-// CHECK: [[REG301:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG301]], align 8
-// CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG303]] to i32
-// CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG302]], [[REG304]]
-// CHECK: [[REG306:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i32 [[REG305]], i32* [[REG306]], align 8
-// CHECK: [[REG307:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG307]], align 4
-// CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG309]] to i32
-// CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG308]], [[REG310]]
-// CHECK: [[REG312:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i32 [[REG311]], i32* [[REG312]], align 4
-
-// CHECK: define available_externally i64 @_mm_sra_pi16
-// CHECK: [[REG313:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
-// CHECK-NEXT: br i1 [[REG313]], label %[[REG314:[0-9a-zA-Z_%.]+]], label %[[REG315:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG314]]
-// CHECK: [[REG316:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG316]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG317]], <8 x i16>* [[REG318:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG319]] to i16
-// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext [[REG320]])
-// CHECK-NEXT: store <8 x i16> [[REG321]], <8 x i16>* [[REG322:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG318]], align 16
-// CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG322]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG323]], <8 x i16> noundef [[REG324]])
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG325:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: br label %[[REG326:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG315]]
-// CHECK-NEXT: store i64 0, i64* [[REG325]], align 8
-// CHECK-NEXT: br label %[[REG326]]
-// CHECK: [[REG326]]
-// CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG325]], align 8
-// CHECK-NEXT: ret i64 [[REG327]]
-
-// CHECK: define available_externally i64 @_mm_srai_pi32
-// CHECK: [[REG328:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32(i64 noundef {{[0-9a-zA-Z_%.]+}}, i64 noundef [[REG328]])
-// CHECK-NEXT: ret i64 [[REG329]]
-
-// CHECK: define available_externally i64 @_mm_srai_pi16
-// CHECK: [[REG330:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16(i64 noundef {{[0-9a-zA-Z_%.]+}}, i64 noundef [[REG330]])
-// CHECK-NEXT: ret i64 [[REG331]]
+// CHECK-LABEL: define available_externally i64 @_mm_sra_pi32
+// CHECK: %[[TRUNC1:[0-9a-zA-Z_.]+]] = trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: ashr i32 %{{[0-9a-zA-Z_.]+}}, %[[TRUNC1]]
+// CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: ashr i32 %{{[0-9a-zA-Z_.]+}}, %[[TRUNC2]]
+
+// CHECK-LABEL: define available_externally i64 @_mm_sra_pi16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp ule i64 %{{[0-9a-zA-Z_.]+}}, 15
+// CHECK: br i1 %[[CMP]]
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i64 %{{[0-9a-zA-Z_.]+}} to i16
+// CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC]])
+// CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])
+// CHECK: store i64 0, i64*
+
+// CHECK-LABEL: define available_externally i64 @_mm_srai_pi32
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: call i64 @_mm_sra_pi32(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %[[EXT]])
+
+// CHECK-LABEL: define available_externally i64 @_mm_srai_pi16
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: call i64 @_mm_sra_pi16(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %[[EXT]])
 
 void __attribute__((noinline))
 test_alt_name_sra() {
@@ -876,21 +588,17 @@ test_alt_name_sra() {
 
 // CHECK-LABEL: @test_alt_name_sra
 
-// CHECK: define available_externally i64 @_m_psrad
-// CHECK: [[REG332:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32
-// CHECK-NEXT: ret i64 [[REG332]]
+// CHECK-LABEL: define available_externally i64 @_m_psrad
+// CHECK: call i64 @_mm_sra_pi32
 
-// CHECK: define available_externally i64 @_m_psraw
-// CHECK: [[REG333:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16
-// CHECK-NEXT: ret i64 [[REG333]]
+// CHECK-LABEL: define available_externally i64 @_m_psraw
+// CHECK: call i64 @_mm_sra_pi16
 
-// CHECK: define available_externally i64 @_m_psradi
-// CHECK: [[REG334:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi32
-// CHECK-NEXT: ret i64 [[REG334]]
+// CHECK-LABEL: define available_externally i64 @_m_psradi
+// CHECK: call i64 @_mm_srai_pi32
 
-// CHECK: define available_externally i64 @_m_psrawi
-// CHECK: [[REG335:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi16
-// CHECK-NEXT: ret i64 [[REG335]]
+// CHECK-LABEL: define available_externally i64 @_m_psrawi
+// CHECK: call i64 @_mm_srai_pi16
 
 void __attribute__((noinline))
 test_srl() {
@@ -904,63 +612,35 @@ test_srl() {
 
 // CHECK-LABEL: @test_srl
 
-// CHECK: define available_externally i64 @_mm_srl_si64
-// CHECK: [[REG336:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
-// CHECK-NEXT: ret i64 [[REG336]]
-
-// CHECK: define available_externally i64 @_mm_srl_pi32
-// CHECK: [[REG337:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG337]], align 8
-// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG339]] to i32
-// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG338]], [[REG340]]
-// CHECK: [[REG342:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i32 [[REG341]], i32* [[REG342]], align 8
-// CHECK: [[REG343:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG343]], align 4
-// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG345]] to i32
-// CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG344]], [[REG346]]
-// CHECK: [[REG348:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i32 [[REG347]], i32* [[REG348]], align 4
-
-// CHECK: define available_externally i64 @_mm_srl_pi16
-// CHECK: [[REG349:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
-// CHECK-NEXT: br i1 [[REG349]], label %[[REG350:[0-9a-zA-Z_%.]+]], label %[[REG351:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG350]]
-// CHECK: [[REG352:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG352]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG353]], <8 x i16>* [[REG354:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG355]] to i16
-// CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext [[REG356]])
-// CHECK-NEXT: store <8 x i16> [[REG357]], <8 x i16>* [[REG358:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG354]], align 16
-// CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG358]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG359]], <8 x i16> noundef [[REG360]])
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG361:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: br label %[[REG362:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG351]]
-// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: br label %[[REG362]]
-// CHECK: [[REG362]]
-// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG361]], align 8
-// CHECK-NEXT: ret i64 [[REG363]]
-
-// CHECK: define available_externally i64 @_mm_srli_si64
-// CHECK: [[REG364:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, [[REG364]]
-// CHECK-NEXT: ret i64 [[REG365]]
-
-// CHECK: define available_externally i64 @_mm_srli_pi32
-// CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32(i64 noundef {{[0-9a-zA-Z_%.]+}}, i64 noundef [[REG366]])
-// CHECK-NEXT: ret i64 [[REG367]]
-
-// CHECK: define available_externally i64 @_mm_srli_pi16
-// CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
-// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16(i64 noundef {{[0-9a-zA-Z_%.]+}}, i64 noundef [[REG366]])
-// CHECK-NEXT: ret i64 [[REG368]]
+// CHECK-LABEL: define available_externally i64 @_mm_srl_si64
+// CHECK: lshr i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+
+// CHECK-LABEL: define available_externally i64 @_mm_srl_pi32
+// CHECK: %[[TRUNC1:[0-9a-zA-Z_.]+]] = trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: lshr i32 %{{[0-9a-zA-Z_.]+}}, %[[TRUNC1]]
+// CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+// CHECK: lshr i32 %{{[0-9a-zA-Z_.]+}}, %[[TRUNC2]]
+
+// CHECK-LABEL: define available_externally i64 @_mm_srl_pi16
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp ule i64 %{{[0-9a-zA-Z_.]+}}, 15
+// CHECK: br i1 %[[CMP]]
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i16
+// CHECK: call <8 x i16> @vec_splats(unsigned short)
+// CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])
+// CHECK: store i64 0, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+
+// CHECK-LABEL: define available_externally i64 @_mm_srli_si64
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: lshr i64 %{{[0-9a-zA-Z_.]+}}, %[[EXT]]
+
+// CHECK-LABEL: define available_externally i64 @_mm_srli_pi32
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: call i64 @_mm_srl_pi32(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %[[EXT]])
+
+// CHECK-LABEL: define available_externally i64 @_mm_srli_pi16
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: call i64 @_mm_srl_pi16(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %[[EXT]])
 
 void __attribute__((noinline))
 test_alt_name_srl() {
@@ -974,29 +654,23 @@ test_alt_name_srl() {
 
 // CHECK-LABEL: @test_alt_name_srl
 
-// CHECK: define available_externally i64 @_m_psrlq
-// CHECK: [[REG369:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_si64
-// CHECK-NEXT: ret i64 [[REG369]]
+// CHECK-LABEL: define available_externally i64 @_m_psrlq
+// CHECK: call i64 @_mm_srl_si64
 
-// CHECK: define available_externally i64 @_m_psrld
-// CHECK: [[REG370:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32
-// CHECK-NEXT: ret i64 [[REG370]]
+// CHECK-LABEL: define available_externally i64 @_m_psrld
+// CHECK: call i64 @_mm_srl_pi32
 
-// CHECK: define available_externally i64 @_m_psrlw
-// CHECK: [[REG371:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16
-// CHECK-NEXT: ret i64 [[REG371]]
+// CHECK-LABEL: define available_externally i64 @_m_psrlw
+// CHECK: call i64 @_mm_srl_pi16
 
-// CHECK: define available_externally i64 @_m_psrlqi
-// CHECK: [[REG372:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_si64
-// CHECK-NEXT: ret i64 [[REG372]]
+// CHECK-LABEL: define available_externally i64 @_m_psrlqi
+// CHECK: call i64 @_mm_srli_si64
 
-// CHECK: define available_externally i64 @_m_psrldi
-// CHECK: [[REG373:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi32
-// CHECK-NEXT: ret i64 [[REG373]]
+// CHECK-LABEL: define available_externally i64 @_m_psrldi
+// CHECK: call i64 @_mm_srli_pi32
 
-// CHECK: define available_externally i64 @_m_psrlwi
-// CHECK: [[REG374:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi16
-// CHECK-NEXT: ret i64 [[REG374]]
+// CHECK-LABEL: define available_externally i64 @_m_psrlwi
+// CHECK: call i64 @_mm_srli_pi16
 
 void __attribute__((noinline))
 test_sub() {
@@ -1011,94 +685,44 @@ test_sub() {
 
 // CHECK-LABEL: @test_sub
 
-// CHECK: define available_externally i64 @_mm_sub_pi32
-
-// CHECK-P8: [[REG375:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-P8-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG375]], align 8
-// CHECK-P8: [[REG377:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-P8-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG377]], align 8
-// CHECK-P8-NEXT: sub nsw i32 [[REG376]], [[REG378]]
-// CHECK-P8: [[REG379:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-P8-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG379]], align 4
-// CHECK-P8: [[REG381:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-P8-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG381]], align 4
-// CHECK-P8-NEXT: sub nsw i32 [[REG380]], [[REG382]]
-
-// CHECK-P9: [[REG383:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-P9-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG383]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG384]], <4 x i32>* [[REG385:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-P9: [[REG386:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-P9-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG386]] to <4 x i32>
-// CHECK-P9-NEXT: store <4 x i32> [[REG387]], <4 x i32>* [[REG388:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-P9-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG385]], align 16
-// CHECK-P9-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG388]], align 16
-// CHECK-P9-NEXT: call <4 x i32> @vec_sub(int vector[4], int vector[4])(<4 x i32> noundef [[REG389]], <4 x i32> noundef [[REG390]])
-
-// CHECK: define available_externally i64 @_mm_sub_pi16
-// CHECK: [[REG391:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG391]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG392]], <8 x i16>* [[REG393:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG394:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG394]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG395]], <8 x i16>* [[REG396:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG393]], align 16
-// CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG396]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_sub(short vector[8], short vector[8])(<8 x i16> noundef [[REG397]], <8 x i16> noundef [[REG398]])
-
-// CHECK: define available_externally i64 @_mm_sub_pi8
-// CHECK: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG400]], <16 x i8>* [[REG401:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG402:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG402]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG403]], <16 x i8>* [[REG404:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG401]], align 16
-// CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG404]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_sub(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG405]], <16 x i8> noundef [[REG406]])
-
-// CHECK: define available_externally i64 @_mm_subs_pi16
-// CHECK: [[REG407:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG407]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG408]], <8 x i16>* [[REG409:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG410:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG410]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG411]], <8 x i16>* [[REG412:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG409]], align 16
-// CHECK-NEXT: [[REG414:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG412]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_subs(short vector[8], short vector[8])(<8 x i16> noundef [[REG413]], <8 x i16> noundef [[REG414]])
-
-// CHECK: define available_externally i64 @_mm_subs_pi8
-// CHECK: [[REG415:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG416:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG415]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG416]], <16 x i8>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG418:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG418]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG419]], <16 x i8>* [[REG420:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG421:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG417]], align 16
-// CHECK-NEXT: [[REG422:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG420]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG421]], <16 x i8> noundef [[REG422]])
-
-// CHECK: define available_externally i64 @_mm_subs_pu16
-// CHECK: [[REG423:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG424:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG423]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG424]], <8 x i16>* [[REG425:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG426:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG427:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG426]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG427]], <8 x i16>* [[REG428:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG425]], align 16
-// CHECK-NEXT: [[REG430:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG428]], align 16
-// CHECK-NEXT: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG429]], <8 x i16> noundef [[REG430]])
-
-// CHECK: define available_externally i64 @_mm_subs_pu8
-// CHECK: [[REG431:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG431]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG432]], <16 x i8>* [[REG433:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG434:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG434]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG435]], <16 x i8>* [[REG436:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG433]], align 16
-// CHECK-NEXT: [[REG438:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG436]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG437]], <16 x i8> noundef [[REG438]])
+// CHECK-LABEL: define available_externally i64 @_mm_sub_pi32
+// CHECK-P8-COUNT-2: getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK-P8: sub nsw i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK-P8-COUNT-2: getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK-P8: sub nsw i32 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-P9: call <4 x i32> @vec_sub(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally i64 @_mm_sub_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_sub(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_sub_pi8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_sub(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally i64 @_mm_subs_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_subs_pi8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally i64 @_mm_subs_pu16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_subs_pu8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])
 
 void __attribute__((noinline))
 test_alt_name_sub() {
@@ -1113,33 +737,26 @@ test_alt_name_sub() {
 
 // CHECK-LABEL: @test_alt_name_sub
 
-// CHECK: define available_externally i64 @_m_psubd
-// CHECK: [[REG439:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi32
-// CHECK-NEXT: ret i64 [[REG439]]
+// CHECK-LABEL: define available_externally i64 @_m_psubd
+// CHECK: call i64 @_mm_sub_pi32
 
-// CHECK: define available_externally i64 @_m_psubw
-// CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi16
-// CHECK-NEXT: ret i64 [[REG440]]
+// CHECK-LABEL: define available_externally i64 @_m_psubw
+// CHECK: call i64 @_mm_sub_pi16
 
-// CHECK: define available_externally i64 @_m_psubb
-// CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi8
-// CHECK-NEXT: ret i64 [[REG441]]
+// CHECK-LABEL: define available_externally i64 @_m_psubb
+// CHECK: call i64 @_mm_sub_pi8
 
-// CHECK: define available_externally i64 @_m_psubsw
-// CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi16
-// CHECK-NEXT: ret i64 [[REG442]]
+// CHECK-LABEL: define available_externally i64 @_m_psubsw
+// CHECK: call i64 @_mm_subs_pi16
 
-// CHECK: define available_externally i64 @_m_psubsb
-// CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi8
-// CHECK-NEXT: ret i64 [[REG443]]
+// CHECK-LABEL: define available_externally i64 @_m_psubsb
+// CHECK: call i64 @_mm_subs_pi8
 
-// CHECK: define available_externally i64 @_m_psubusw
-// CHECK: [[REG444:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu16
-// CHECK-NEXT: ret i64 [[REG444]]
+// CHECK-LABEL: define available_externally i64 @_m_psubusw
+// CHECK: call i64 @_mm_subs_pu16
 
-// CHECK: define available_externally i64 @_m_psubusb
-// CHECK: [[REG445:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu8
-// CHECK-NEXT: ret i64 [[REG445]]
+// CHECK-LABEL: define available_externally i64 @_m_psubusb
+// CHECK: call i64 @_mm_subs_pu8
 
 void __attribute__((noinline))
 test_unpack() {
@@ -1153,87 +770,59 @@ test_unpack() {
 
 // CHECK-LABEL: @test_unpack
 
-// CHECK: define available_externally i64 @_mm_unpackhi_pi32
-// CHECK: [[REG446:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: [[REG447:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG446]], align 4
-// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449:[0-9a-zA-Z_%.]+]] to [2 x i32]*
-// CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG448]], i64 0, i64 0
-// CHECK-NEXT: store i32 [[REG447]], i32* [[REG450]], align 8
-// CHECK: [[REG451:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG451]], align 4
-// CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449]] to [2 x i32]*
-// CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG453]], i64 0, i64 1
-// CHECK-NEXT: store i32 [[REG452]], i32* [[REG454]], align 4
-
-// CHECK: define available_externally i64 @_mm_unpackhi_pi16
-// CHECK: [[REG455:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
-// CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG455]], align 4
-// CHECK: [[REG457:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i16 [[REG456]], i16* [[REG457]], align 8
-// CHECK: [[REG458:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
-// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG458]], align 4
-// CHECK: [[REG460:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i16 [[REG459]], i16* [[REG460]], align 2
-// CHECK: [[REG461:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
-// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG461]], align 2
-// CHECK: [[REG463:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
-// CHECK-NEXT: store i16 [[REG462]], i16* [[REG463]], align 4
-// CHECK: [[REG464:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
-// CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG464]], align 2
-// CHECK: [[REG466:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
-// CHECK-NEXT: store i16 [[REG465]], i16* [[REG466]], align 2
-
-// CHECK: define available_externally i64 @_mm_unpackhi_pi8
-// CHECK: [[REG467:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG468:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG467]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG468]], <16 x i8>* [[REG469:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG470:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG471:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG472:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG471]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG472]], <16 x i8>* [[REG473:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG474:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG469]], align 16
-// CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG473]], align 16
-// CHECK-NEXT: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG474]], <16 x i8> noundef [[REG475]])
-
-// CHECK: define available_externally i64 @_mm_unpacklo_pi32
-// CHECK: [[REG476:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG476]], align 8
-// CHECK: [[REG478:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i32 [[REG477]], i32* [[REG478]], align 8
-// CHECK: [[REG479:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG479]], align 8
-// CHECK: [[REG481:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i32 [[REG480]], i32* [[REG481]], align 4
-
-// CHECK: define available_externally i64 @_mm_unpacklo_pi16
-// CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: [[REG483:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG482]], align 8
-// CHECK: [[REG484:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: store i16 [[REG483]], i16* [[REG484]], align 8
-// CHECK: [[REG485:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
-// CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG485]], align 8
-// CHECK: [[REG487:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: store i16 [[REG486]], i16* [[REG487]], align 2
-// CHECK: [[REG488:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG488]], align 2
-// CHECK: [[REG490:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
-// CHECK-NEXT: store i16 [[REG489]], i16* [[REG490]], align 4
-// CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
-// CHECK-NEXT: [[REG492:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG491]], align 2
-// CHECK: [[REG493:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
-// CHECK-NEXT: store i16 [[REG492]], i16* [[REG493]], align 2
-
-// CHECK: define available_externally i64 @_mm_unpacklo_pi8
-// CHECK: [[REG494:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG494]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG495]], <16 x i8>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG498:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG498]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG499]], <16 x i8>* [[REG500:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG496]], align 16
-// CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG500]], align 16
-// CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
+// CHECK-LABEL: define available_externally i64 @_mm_unpackhi_pi32
+// CHECK: getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: %[[ADDR1:[0-9a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: store i32 %{{[0-9a-zA-Z_.]+}}, i32* %[[ADDR1]], align 8
+// CHECK: getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: store i32 %{{[0-9a-zA-Z_.]+}}, i32* %[[ADDR2]], align 4
+
+// CHECK-LABEL: define available_externally i64 @_mm_unpackhi_pi16
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
+// CHECK: %[[ADDR1:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR1]], align 8
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
+// CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR2]], align 2
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
+// CHECK: %[[ADDR3:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR3]], align 4
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
+// CHECK: %[[ADDR4:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR4]], align 2
+
+// CHECK-LABEL: define available_externally i64 @_mm_unpackhi_pi8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally i64 @_mm_unpacklo_pi32
+// CHECK: getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: %[[ADDR1:[0-9a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: store i32 %{{[0-9a-zA-Z_.]+}}, i32* %[[ADDR1]], align 8
+// CHECK: getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: store i32 %{{[0-9a-zA-Z_.]+}}, i32* %[[ADDR2]], align 4
+
+// CHECK-LABEL: define available_externally i64 @_mm_unpacklo_pi16
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: %[[ADDR1:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR1]], align 8
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR2]], align 2
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: %[[ADDR3:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR3]], align 4
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: %[[ADDR4:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
+// CHECK: store i16 %{{[0-9a-zA-Z_.]+}}, i16* %[[ADDR4]], align 2
+
+// CHECK-LABEL: define available_externally i64 @_mm_unpacklo_pi8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
 
 void __attribute__((noinline))
 test_alt_name_unpack() {
@@ -1247,26 +836,20 @@ test_alt_name_unpack() {
 
 // CHECK-LABEL: @test_alt_name_unpack
 
-// CHECK: define available_externally i64 @_m_punpckhdq
-// CHECK: [[REG238:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi32
-// CHECK-NEXT: ret i64 [[REG238]]
+// CHECK-LABEL: define available_externally i64 @_m_punpckhdq
+// CHECK: call i64 @_mm_unpackhi_pi32
 
-// CHECK: define available_externally i64 @_m_punpckhwd
-// CHECK: [[REG239:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi16
-// CHECK-NEXT: ret i64 [[REG239]]
+// CHECK-LABEL: define available_externally i64 @_m_punpckhwd
+// CHECK: call i64 @_mm_unpackhi_pi16
 
-// CHECK: define available_externally i64 @_m_punpckhbw
-// CHECK: [[REG240:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi8
-// CHECK-NEXT: ret i64 [[REG240]]
+// CHECK-LABEL: define available_externally i64 @_m_punpckhbw
+// CHECK: call i64 @_mm_unpackhi_pi8
 
-// CHECK: define available_externally i64 @_m_punpckldq
-// CHECK: [[REG241:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi32
-// CHECK-NEXT: ret i64 [[REG241]]
+// CHECK-LABEL: define available_externally i64 @_m_punpckldq
+// CHECK: call i64 @_mm_unpacklo_pi32
 
-// CHECK: define available_externally i64 @_m_punpcklwd
-// CHECK: [[REG242:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi16
-// CHECK-NEXT: ret i64 [[REG242]]
+// CHECK-LABEL: define available_externally i64 @_m_punpcklwd
+// CHECK: call i64 @_mm_unpacklo_pi16
 
-// CHECK: define available_externally i64 @_m_punpcklbw
-// CHECK: [[REG243:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi8
-// CHECK-NEXT: ret i64 [[REG243]]
+// CHECK-LABEL: define available_externally i64 @_m_punpcklbw
+// CHECK: call i64 @_mm_unpacklo_pi8

diff  --git a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
index 265ba0d8284a7..5b2027dd52197 100644
--- a/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
@@ -13,6 +12,8 @@
 
 #include <smmintrin.h>
 
+__m128 mn1, mn2;
+__m128d md1, md2;
 __m128i mi, m1, m2;
 
 void __attribute__((noinline))
@@ -25,46 +26,23 @@ test_extract() {
 
 // CHECK-LABEL: @test_extract
 
-// CHECK: define available_externally signext i32 @_mm_extract_epi8(<2 x i64> noundef [[REG1:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG2:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1]], <2 x i64>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG2]], i32* [[REG4:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG3]], align 16
-// CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG5]] to <16 x i8>
-// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG4]], align 4
-// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = and i32 [[REG7]], 15
-// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = extractelement <16 x i8> [[REG6]], i32 [[REG8]]
-// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = zext i8 [[REG9]] to i32
-// CHECK-NEXT: ret i32 [[REG10]]
-
-// CHECK: define available_externally signext i32 @_mm_extract_epi32(<2 x i64> noundef [[REG11:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG12:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG11]], <2 x i64>* [[REG13:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG12]], i32* [[REG14:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG13]], align 16
-// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG15]] to <4 x i32>
-// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG14]], align 4
-// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = and i32 [[REG17]], 3
-// CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG16]], i32 [[REG18]]
-// CHECK-NEXT: ret i32 [[REG19]]
-
-// CHECK: define available_externally signext i32 @_mm_extract_epi64(<2 x i64> noundef [[REG20:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG21:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG20]], <2 x i64>* [[REG22:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG21]], i32* [[REG23:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG22]], align 16
-// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG23]], align 4
-// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = and i32 [[REG25]], 1
-// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG24]], i32 [[REG26]]
-// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG27]] to i32
-// CHECK-NEXT: ret i32 [[REG28]]
-
-// CHECK: define available_externally signext i32 @_mm_extract_ps(<4 x float> noundef [[REG29:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG30:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG29]], <4 x float>* [[REG31:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG30]], i32* [[REG32:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG31]], align 16
-// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG33]] to <4 x i32>
-// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG32]], align 4
-// CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = and i32 [[REG35]], 3
-// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG34]], i32 [[REG36]]
-// CHECK-NEXT: ret i32 [[REG37]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_extract_epi8(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 15
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <16 x i8> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
+// CHECK: zext i8 %[[EXT]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_extract_epi32(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_extract_epi64(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 1
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
+// CHECK: trunc i64 %[[EXT]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_extract_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
 
 void __attribute__((noinline))
 test_blend() {
@@ -74,53 +52,20 @@ test_blend() {
 
 // CHECK-LABEL: @test_blend
 
-// CHECK: define available_externally <2 x i64> @_mm_blend_epi16(<2 x i64> noundef [[REG38:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG39:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG40:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG38]], <2 x i64>* [[REG41:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG39]], <2 x i64>* [[REG42:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG40]], i32* [[REG43:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG43]], align 4
-// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG44]] to i8
-// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(signed char)(i8 noundef signext [[REG45]])
-// CHECK-NEXT: store <16 x i8> [[REG46]], <16 x i8>* [[REG47:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG47]], align 16
-// CHECK-NEXT: [[REG49:[0-9a-zA-Z_%.]+]] = call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> [[REG48]])
-// CHECK-NEXT: store <16 x i8> [[REG49]], <16 x i8>* [[REG47]], align 16
-// CHECK-NEXT: [[REG50:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG47]], align 16
-// CHECK-NEXT: [[REG51:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_unpackh(signed char vector[16])(<16 x i8> noundef [[REG50]])
-// CHECK-NEXT: store <8 x i16> [[REG51]], <8 x i16>* [[REG52:[0-9a-zA-Z_%.]+]], align 16
-
-// BE: [[REG53:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG52]], align 16
-// BE-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_reve(unsigned short vector[8])(<8 x i16> [[REG53]])
-// BE-NEXT: store <8 x i16> [[REG54]], <8 x i16>* [[REG52]], align 16
-
-// CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG41]], align 16
-// CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG55]] to <8 x i16>
-// CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG42]], align 16
-// CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG57]] to <8 x i16>
-// CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG52]], align 16
-// CHECK-NEXT: [[REG60:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG56]], <8 x i16> noundef [[REG58]], <8 x i16> noundef [[REG59]])
-// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG60]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG61]]
-
-// CHECK: define available_externally <2 x i64> @_mm_blendv_epi8(<2 x i64> noundef [[REG62:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG63:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG64:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG62]], <2 x i64>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG63]], <2 x i64>* [[REG66:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG64]], <2 x i64>* [[REG67:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG68:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext 7)
-// CHECK-NEXT: store <16 x i8> [[REG68]], <16 x i8>* [[REG69:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG67]], align 16
-// CHECK-NEXT: [[REG71:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG70]] to <16 x i8>
-// CHECK-NEXT: [[REG72:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG69]], align 16
-// CHECK-NEXT: [[REG73:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sra(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG71]], <16 x i8> noundef [[REG72]])
-// CHECK-NEXT: store <16 x i8> [[REG73]], <16 x i8>* [[REG74:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG75:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG65]], align 16
-// CHECK-NEXT: [[REG76:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG75]] to <16 x i8>
-// CHECK-NEXT: [[REG77:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG66]], align 16
-// CHECK-NEXT: [[REG78:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG77]] to <16 x i8>
-// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG74]], align 16
-// CHECK-NEXT: [[REG80:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG76]], <16 x i8> noundef [[REG78]], <16 x i8> noundef [[REG79]])
-// CHECK-NEXT: [[REG81:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG80]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG81]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_blend_epi16(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: call <16 x i8> @vec_splats(signed char)(i8 noundef signext %[[TRUNC]])
+// CHECK: call <16 x i8> @llvm.ppc.altivec.vgbbd(<16 x i8> %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[PACK:[0-9a-zA-Z_.]+]] = call <8 x i16> @vec_unpackh(signed char vector[16])
+// CHECK: store <8 x i16> %[[PACK]], <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// BE: %[[REVE:[0-9a-zA-Z_.]+]] = call <8 x i16> @vec_reve(unsigned short vector[8])
+// BE: store <8 x i16> %[[REVE]], <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], unsigned short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_blendv_epi8(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext 7)
+// CHECK: call <16 x i8> @vec_sra(unsigned char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16])
 
 void __attribute__((noinline))
 test_insert() {
@@ -131,25 +76,18 @@ test_insert() {
 
 // CHECK-LABEL: @test_insert
 
-// CHECK: define available_externally <2 x i64> @_mm_insert_epi8(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef signext {{[0-9a-zA-Z_%.]+}}, i32 noundef signext {{[0-9a-zA-Z_%.]+}})
-// CHECK: %{{[0-9a-zA-Z_.]+}} = bitcast <2 x i64> %{{[0-9a-zA-Z_.]+}} to <16 x i8>
-// CHECK: %[[R0:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
-// CHECK: %[[R1:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 15
-// CHECK: %{{[0-9a-zA-Z_.]+}} = insertelement <16 x i8> %{{[0-9a-zA-Z_.]+}}, i8 %[[R0]], i32 %[[R1]]
-// CHECK: %[[R2:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
-// CHECK: ret <2 x i64> %[[R2]]
-
-// CHECK: define available_externally <2 x i64> @_mm_insert_epi32(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef signext {{[0-9a-zA-Z_%.]+}}, i32 noundef signext {{[0-9a-zA-Z_%.]+}})
-// CHECK: %{{[0-9a-zA-Z_.]+}} = bitcast <2 x i64> %{{[0-9a-zA-Z_.]+}} to <4 x i32>
-// CHECK: %[[R0:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
-// CHECK: %{{[0-9a-zA-Z_.]+}} = insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 %[[R0]]
-// CHECK: %[[R1:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
-// CHECK: ret <2 x i64> %[[R1]]
-
-// CHECK: define available_externally <2 x i64> @_mm_insert_epi64(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i64 noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef signext {{[0-9a-zA-Z_%.]+}})
-// CHECK: %[[R0:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 1
-// CHECK: %{{[0-9a-zA-Z_.]+}} = insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %{{[0-9a-zA-Z_.]+}}, i32 %[[R0:[0-9a-zA-Z_.]+]]
-// CHECK: ret <2 x i64> %{{[0-9a-zA-Z_.]+}}
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_insert_epi8(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 15
+// CHECK: insertelement <16 x i8> %{{[0-9a-zA-Z_.]+}}, i8 %[[TRUNC]], i32 %[[AND]]
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_insert_epi32(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_insert_epi64(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 1
+// CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %{{[0-9a-zA-Z_.]+}}, i32 %[[AND:[0-9a-zA-Z_.]+]]
 
 // To test smmintrin.h includes tmmintrin.h
 
@@ -160,4 +98,4 @@ test_abs_ssse3() {
 
 // CHECK-LABEL: @test_abs_ssse3
 
-// CHECK: define available_externally <2 x i64> @_mm_abs_epi16(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}})
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_abs_epi16(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}})

diff  --git a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
index 62e830f6b3aae..f4af5174e60e4 100644
--- a/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
 // RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
@@ -28,77 +27,29 @@ test_abs() {
 
 // CHECK-LABEL: @test_abs
 
-// CHECK: define available_externally <2 x i64> @_mm_abs_epi16(<2 x i64> noundef [[REG1:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG1]], <2 x i64>* [[REG2:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG3:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG2]], align 16
-// CHECK-NEXT: [[REG4:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG3]] to <8 x i16>
-// CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_abs(short vector[8])(<8 x i16> noundef [[REG4]])
-// CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG5]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG6]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_abs_epi16
+// CHECK: call <8 x i16> @vec_abs(short vector[8])
 
-// CHECK: define available_externally <2 x i64> @_mm_abs_epi32(<2 x i64> noundef [[REG7:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG7]], <2 x i64>* [[REG8:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG8]], align 16
-// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG9]] to <4 x i32>
-// CHECK-NEXT: [[REG11:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_abs(int vector[4])(<4 x i32> noundef [[REG10]])
-// CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG11]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG12]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_abs_epi32
+// CHECK: call <4 x i32> @vec_abs(int vector[4])
 
-// CHECK: define available_externally <2 x i64> @_mm_abs_epi8(<2 x i64> noundef [[REG13:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG13]], <2 x i64>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG14]], align 16
-// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG15]] to <16 x i8>
-// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_abs(signed char vector[16])(<16 x i8> noundef [[REG16]])
-// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG17]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG18]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_abs_epi8
+// CHECK: call <16 x i8> @vec_abs(signed char vector[16])
 
-// CHECK: define available_externally i64 @_mm_abs_pi16(i64 noundef [[REG19:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG19]], i64* [[REG20:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG20]], align 8
-// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG21]], i32 0
-// CHECK-NEXT: [[REG23:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG20]], align 8
-// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG22]], i64 [[REG23]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG24]], <2 x i64>* [[REG25:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG25]], align 16
-// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* [[REG28:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG28]], align 16
-// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_abs(short vector[8])(<8 x i16> noundef [[REG29]])
-// CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG30]] to <2 x i64>
-// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG31]], i32 0
-// CHECK-NEXT: ret i64 [[REG32]]
+// CHECK-LABEL: define available_externally i64 @_mm_abs_pi16
+// CHECK: %[[ABS:[0-9a-zA-Z_.]+]] = call <8 x i16> @vec_abs(short vector[8])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %[[ABS]] to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
-// CHECK: define available_externally i64 @_mm_abs_pi32(i64 noundef [[REG33:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG33]], i64* [[REG34:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG34]], align 8
-// CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG35]], i32 0
-// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG34]], align 8
-// CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG36]], i64 [[REG37]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG38]], <2 x i64>* [[REG39:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG39]], align 16
-// CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG40]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG41]], <4 x i32>* [[REG42:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG42]], align 16
-// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_abs(int vector[4])(<4 x i32> noundef [[REG43]])
-// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG44]] to <2 x i64>
-// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG45]], i32 0
-// CHECK-NEXT: ret i64 [[REG46]]
+// CHECK-LABEL: define available_externally i64 @_mm_abs_pi32
+// CHECK: %[[ABS:[0-9a-zA-Z_.]+]] = call <4 x i32> @vec_abs(int vector[4])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %[[ABS]] to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
-// CHECK: define available_externally i64 @_mm_abs_pi8(i64 noundef [[REG47:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG47]], i64* [[REG48:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG49:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG48]], align 8
-// CHECK-NEXT: [[REG50:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG49]], i32 0
-// CHECK-NEXT: [[REG51:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG48]], align 8
-// CHECK-NEXT: [[REG52:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG50]], i64 [[REG51]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG52]], <2 x i64>* [[REG53:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG53]], align 16
-// CHECK-NEXT: [[REG55:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG54]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG55]], <16 x i8>* [[REG56:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG56]], align 16
-// CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_abs(signed char vector[16])(<16 x i8> noundef [[REG57]])
-// CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG58]] to <2 x i64>
-// CHECK-NEXT: [[REG60:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG59]], i32 0
-// CHECK-NEXT: ret i64 [[REG60]]
+// CHECK-LABEL: define available_externally i64 @_mm_abs_pi8
+// CHECK: %[[ABS:[0-9a-zA-Z_.]+]] = call <16 x i8> @vec_abs(signed char vector[16])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %[[ABS]] to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
 void __attribute__((noinline))
 test_alignr() {
@@ -108,141 +59,45 @@ test_alignr() {
 
 // CHECK-LABEL: @test_alignr
 
-// CHECK: define available_externally <2 x i64> @_mm_alignr_epi8(<2 x i64> noundef [[REG61:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG62:[0-9a-zA-Z_%.]+]], i32 noundef zeroext [[REG63:[0-9a-zA-Z_%.]+]])
-// CHECK: [[REG64:[0-9a-zA-Z_%.]+]] = alloca i32, align 4
-// CHECK: store <2 x i64> [[REG61]], <2 x i64>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG62]], <2 x i64>* [[REG66:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG63]], i32* [[REG64:[0-9a-zA-Z_%.]+]], align 4
-// CHECK: %[[REG64b:[0-9a-zA-Z_%.]+]] = call i1 @llvm.is.constant.i32(i32 %0)
-// CHECK-NEXT: br i1 %[[REG64b]], label %[[REG67:[0-9a-zA-Z_%.]+]], label %[[REG68:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG67]]:
-// CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG64]], align 4
-// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = icmp ult i32 [[REG69]], 16
-// CHECK-NEXT: br i1 [[REG70]], label %[[REG71:[0-9a-zA-Z_%.]+]], label %[[REG68:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG71]]:
-// CHECK-BE-NEXT: load <2 x i64>, <2 x i64>* [[REG66]], align 16
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_alignr_epi8
+// CHECK: %[[CONST:[0-9a-zA-Z_.]+]] = call i1 @llvm.is.constant.i32(i32 %0)
+// CHECK: br i1 %[[CONST]]
 // CHECK-BE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)
-// CHECK-LE-NEXT: load <2 x i64>, <2 x i64>* [[REG65]], align 16
 // CHECK-LE: call <16 x i8> @vec_reve(unsigned char vector[16])
 // CHECK-LE: call <16 x i8> @vec_reve(unsigned char vector[16])
 // CHECk-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)
 // CHECK-LE: call <16 x i8> @vec_reve(unsigned char vector[16])
-// CHECK: br label %[[REG72:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG68]]:
-// CHECK-NEXT: [[REG73:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG64]], align 4
-// CHECK-NEXT: [[REG74:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG73]], 0
-// CHECK-NEXT: br i1 [[REG74]], label %[[REG75:[0-9a-zA-Z_%.]+]], label %[[REG76:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG75]]:
-// CHECK-NEXT: [[REG77:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG66]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG77]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: br label %[[REG72:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG76]]:
-// CHECK-NEXT: [[REG78:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG64]], align 4
-// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = icmp uge i32 [[REG78]], 16
-// CHECK-NEXT: br i1 [[REG79]], label %[[REG80:[0-9a-zA-Z_%.]+]], label %[[REG81:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG80]]:
-// CHECK-NEXT: [[REG82:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG64]], align 4
-// CHECK-NEXT: [[REG83:[0-9a-zA-Z_%.]+]] = icmp uge i32 [[REG82]], 32
-// CHECK-NEXT: br i1 [[REG83]], label %[[REG84:[0-9a-zA-Z_%.]+]], label %[[REG85:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG84]]:
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG86:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: br label %[[REG72:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG85]]:
-// CHECK-NEXT: [[REG87:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG64]], align 4
-// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = sub i32 [[REG87]], 16
-// CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = mul i32 [[REG88]], 8
-// CHECK-NEXT: [[REG90:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG89]] to i8
-// CHECK-NEXT: [[REG91:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext [[REG90]])
-// CHECK-NEXT: store <16 x i8> [[REG91]], <16 x i8>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG65]], align 16
-// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG93]] to <16 x i8>
-// CHECK-NEXT: [[REG95:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG92]], align 16
-// CHECK-BE-NEXT: [[REG96:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
-// CHECK-LE-NEXT: [[REG96:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
-// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG96]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG97]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: br label %[[REG72:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG81]]:
-// CHECK-NEXT: [[REG98:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG64]], align 4
-// CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = sub i32 16, [[REG98]]
-// CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = mul i32 [[REG99]], 8
-
-// CHECK-BE: [[REG101:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG100]] to i8
-// CHECK-BE: [[REG102:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext [[REG101]])
-// CHECK-BE: mul i32 {{[0-9a-zA-Z_%.]+}}, 8
+// CHECK: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <2 x i64> zeroinitializer, <2 x i64>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: %[[SUB:[0-9a-zA-Z_.]+]] = sub i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: %[[MUL:[0-9a-zA-Z_.]+]] = mul i32 %[[SUB]], 8
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
+// CHECK: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
+// CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
+// CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
+// CHECK: %[[SUB2:[0-9a-zA-Z_.]+]] = sub i32 16, %{{[0-9a-zA-Z_.]+}}
+// CHECK: %[[MUL2:[0-9a-zA-Z_.]+]] = mul i32 %[[SUB2]], 8
+// CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL2]] to i8
+// CHECK-BE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
+// CHECK-BE: mul i32 %{{[0-9a-zA-Z_.]+}}, 8
 // CHECK-BE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
 // CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
-// CHECK-BE: [[REG103:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_or(unsigned char vector[16], unsigned char vector[16])
-// CHECK-BE-NEXT: [[REG104:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG103]] to <2 x i64>
-// CHECK-BE-NEXT: store <2 x i64> [[REG104]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-BE-NEXT: br label %[[REG72:[0-9a-zA-Z_%.]+]]
-
-// CHECK-LE: [[REG105:[0-9a-zA-Z_%.]+]] = mul i32 {{[0-9a-zA-Z_%.]+}}, 8
-// CHECK-LE-NEXT: trunc i32 [[REG105]] to i8
+// CHECK-BE: call <16 x i8> @vec_or(unsigned char vector[16], unsigned char vector[16])
+// CHECK-LE: %[[MUL3:[0-9a-zA-Z_.]+]] = mul i32 %{{[0-9a-zA-Z_.]+}}, 8
+// CHECK-LE: trunc i32 %[[MUL3]] to i8
 // CHECK-LE: call <16 x i8> @vec_splats(unsigned char)
 // CHECK-LE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
 // CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
-// CHECK-LE: [[REG106:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_or(unsigned char vector[16], unsigned char vector[16])
-// CHECK-LE-NEXT: [[REG107:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG106]] to <2 x i64>
-// CHECK-LE-NEXT: store <2 x i64> [[REG107]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-
-// CHECK: [[REG72]]:
-// CHECK-NEXT: [[REG108:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <2 x i64> [[REG108]]
+// CHECK-LE: call <16 x i8> @vec_or(unsigned char vector[16], unsigned char vector[16])
 
-// CHECK: define available_externally i64 @_mm_alignr_pi8(i64 noundef [[REG109:[0-9a-zA-Z_%.]+]], i64 noundef [[REG110:[0-9a-zA-Z_%.]+]], i32 noundef zeroext [[REG111:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG109]], i64* [[REG112:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG110]], i64* [[REG113:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i32 [[REG111]], i32* [[REG114:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG114]], align 4
-// CHECK-NEXT: [[REG116:[0-9a-zA-Z_%.]+]] = icmp ult i32 [[REG115]], 16
-// CHECK-NEXT: br i1 [[REG116]], label %[[REG117:[0-9a-zA-Z_%.]+]], label %[[REG118:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG117]]:
-// CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG113]], align 8
-// CHECK-NEXT: [[REG120:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG119]], i32 0
-// CHECK-NEXT: [[REG121:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG112]], align 8
-// CHECK-NEXT: [[REG122:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG120]], i64 [[REG121]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG122]], <2 x i64>* [[REG123:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG124:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG114]], align 4
-// CHECK-NEXT: [[REG125:[0-9a-zA-Z_%.]+]] = shl i32 [[REG124]], 3
-// CHECK-BE-NEXT: [[REG126:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, i32 [[REG125]], i32 3
-// CHECK-LE-NEXT: [[REG127:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> undef, i32 [[REG125]], i32 0
-// CHECK-LE-NEXT: [[REG128:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG127]], i32 0, i32 1
-// CHECK-LE-NEXT: [[REG129:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG128]], i32 0, i32 2
-// CHECK-LE-NEXT: [[REG126:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG129]], i32 0, i32 3
-// CHECK-NEXT: store <4 x i32> [[REG126]], <4 x i32>* [[REG130:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG131:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG123]], align 16
-// CHECK-NEXT: [[REG132:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG131]] to <16 x i8>
-// CHECK-NEXT: [[REG133:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG130]], align 16
-// CHECK-NEXT: [[REG134:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG133]] to <16 x i8>
-// CHECK-BE-NEXT: [[REG135:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG132]], <16 x i8> noundef [[REG134]])
-// CHECK-LE-NEXT: [[REG135:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG132]], <16 x i8> noundef [[REG134]])
-// CHECK-NEXT: [[REG136:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG135]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG136]], <2 x i64>* [[REG123]], align 16
-// CHECK-NEXT: [[REG137:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG123]], align 16
-// CHECK-NEXT: [[REG138:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG137]], i32 0
-// CHECK-NEXT: store i64 [[REG138]], i64* [[REG139:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: br label %[[REG140:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG118]]:
-// CHECK-NEXT: store i64 0, i64* [[REG141:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 0, i64* [[REG139]], align 8
-// CHECK-NEXT: br label %[[REG140:[0-9a-zA-Z_%.]+]]
-
-// CHECK: [[REG140]]:
-// CHECK-NEXT: [[REG142:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG139]], align 8
-// CHECK-NEXT: ret i64 [[REG142]]
+// CHECK-LABEL: define available_externally i64 @_mm_alignr_pi8
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp ult i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: br i1 %[[CMP]]
+// CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
+// CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: store i64 0, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: store i64 0, i64* %{{[0-9a-zA-Z_.]+}}, align 8
 
 void __attribute__((noinline))
 test_hadd() {
@@ -256,159 +111,50 @@ test_hadd() {
 
 // CHECK-LABEL: @test_hadd
 
-// CHECK: define available_externally <2 x i64> @_mm_hadd_epi16(<2 x i64> noundef [[REG143:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG144:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG143]], <2 x i64>* [[REG145:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG144]], <2 x i64>* [[REG146:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* [[REG147:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* [[REG148:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG149:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG145]], align 16
-// CHECK-NEXT: [[REG150:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG149]] to <8 x i16>
-// CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG146]], align 16
-// CHECK-NEXT: [[REG152:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG151]] to <8 x i16>
-// CHECK-NEXT: [[REG153:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG150]], <8 x i16> noundef [[REG152]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
-// CHECK-NEXT: store <8 x i16> [[REG153]], <8 x i16>* [[REG154:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG155:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG145]], align 16
-// CHECK-NEXT: [[REG156:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG155]] to <8 x i16>
-// CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG146]], align 16
-// CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG157]] to <8 x i16>
-// CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG156]], <8 x i16> noundef [[REG158]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
-// CHECK-NEXT: store <8 x i16> [[REG159]], <8 x i16>* [[REG160:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG161:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG154]], align 16
-// CHECK-NEXT: [[REG162:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG160]], align 16
-// CHECK-NEXT: [[REG163:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_add(short vector[8], short vector[8])(<8 x i16> noundef [[REG161]], <8 x i16> noundef [[REG162]])
-// CHECK-NEXT: [[REG164:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG163]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG164]]
-
-// CHECK: define available_externally <2 x i64> @_mm_hadd_epi32(<2 x i64> noundef [[REG165:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG166:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG165]], <2 x i64>* [[REG167:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG166]], <2 x i64>* [[REG168:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* [[REG169:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* [[REG170:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG171:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG167]], align 16
-// CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG171]] to <4 x i32>
-// CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG168]], align 16
-// CHECK-NEXT: [[REG174:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG173]] to <4 x i32>
-// CHECK-NEXT: [[REG175:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG172]], <4 x i32> noundef [[REG174]], <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>)
-// CHECK-NEXT: store <4 x i32> [[REG175]], <4 x i32>* [[REG176:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG167]], align 16
-// CHECK-NEXT: [[REG178:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG177]] to <4 x i32>
-// CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG168]], align 16
-// CHECK-NEXT: [[REG180:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG179]] to <4 x i32>
-// CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG178]], <4 x i32> noundef [[REG180]], <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>)
-// CHECK-NEXT: store <4 x i32> [[REG181]], <4 x i32>* [[REG182:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG176]], align 16
-// CHECK-NEXT: [[REG184:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG182]], align 16
-// CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> noundef [[REG183]], <4 x i32> noundef [[REG184]])
-// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG185]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG186]]
-
-// CHECK: define available_externally i64 @_mm_hadd_pi16(i64 noundef [[REG187:[0-9a-zA-Z_%.]+]], i64 noundef [[REG188:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG187]], i64* [[REG189:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG188]], i64* [[REG190:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG189]], align 8
-// CHECK-NEXT: [[REG192:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG191]], i32 0
-// CHECK-NEXT: [[REG193:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG190]], align 8
-// CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG192]], i64 [[REG193]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG194]], <2 x i64>* [[REG195:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG195]], align 16
-// CHECK-NEXT: [[REG197:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG196]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG197]], <8 x i16>* [[REG198:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, <16 x i8>* [[REG199:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, <16 x i8>* [[REG200:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG201]], <8 x i16> noundef [[REG202]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
-// CHECK-NEXT: store <8 x i16> [[REG203]], <8 x i16>* [[REG204:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG205]], <8 x i16> noundef [[REG206]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
-// CHECK-NEXT: store <8 x i16> [[REG207]], <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG208:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG204]], align 16
-// CHECK-NEXT: [[REG210:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_add(short vector[8], short vector[8])(<8 x i16> noundef [[REG208]], <8 x i16> noundef [[REG209]])
-// CHECK-NEXT: store <8 x i16> [[REG210]], <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG198]], align 16
-// CHECK-NEXT: [[REG212:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG211]] to <2 x i64>
-// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG212]], i32 1
-// CHECK-NEXT: ret i64 [[REG213]]
-
-// CHECK: define available_externally i64 @_mm_hadd_pi32(i64 noundef [[REG214:[0-9a-zA-Z_%.]+]], i64 noundef [[REG215:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG214]], i64* [[REG216:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG215]], i64* [[REG217:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG218:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG216]], align 8
-// CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG218]], i32 0
-// CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG217]], align 8
-// CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG219]], i64 [[REG220]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG221]], <2 x i64>* [[REG222:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG222]], align 16
-// CHECK-NEXT: [[REG224:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG223]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG224]], <4 x i32>* [[REG225:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>, <16 x i8>* [[REG226:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>, <16 x i8>* [[REG227:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG228:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG229:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG230:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG228]], <4 x i32> noundef [[REG229]], <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>)
-// CHECK-NEXT: store <4 x i32> [[REG230]], <4 x i32>* [[REG231:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG232:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG233:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG234:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG232]], <4 x i32> noundef [[REG233]], <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
-// CHECK-NEXT: store <4 x i32> [[REG234]], <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG235:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG236:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG231]], align 16
-// CHECK-NEXT: [[REG237:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> noundef [[REG235]], <4 x i32> noundef [[REG236]])
-// CHECK-NEXT: store <4 x i32> [[REG237]], <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG238:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG225]], align 16
-// CHECK-NEXT: [[REG239:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG238]] to <2 x i64>
-// CHECK-NEXT: [[REG240:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG239]], i32 1
-// CHECK-NEXT: ret i64 [[REG240]]
-
-// CHECK: define available_externally <2 x i64> @_mm_hadds_epi16(<2 x i64> noundef [[REG241:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG242:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG241]], <2 x i64>* [[REG243:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG242]], <2 x i64>* [[REG244:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG245:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG246:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG247:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG243]], align 16
-// CHECK-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG247]] to <8 x i16>
-// CHECK-NEXT: [[REG249:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG245]], align 16
-// CHECK-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(short vector[8], int vector[4])(<8 x i16> noundef [[REG248]], <4 x i32> noundef [[REG249]])
-// CHECK-NEXT: store <4 x i32> [[REG250]], <4 x i32>* [[REG245]], align 16
-// CHECK-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG244]], align 16
-// CHECK-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG251]] to <8 x i16>
-// CHECK-NEXT: [[REG253:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG246]], align 16
-// CHECK-NEXT: [[REG254:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(short vector[8], int vector[4])(<8 x i16> noundef [[REG252]], <4 x i32> noundef [[REG253]])
-// CHECK-NEXT: store <4 x i32> [[REG254]], <4 x i32>* [[REG246]], align 16
-// CHECK-NEXT: [[REG255:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG245]], align 16
-// CHECK-NEXT: [[REG256:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG246]], align 16
-// CHECK-NEXT: [[REG257:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_packs(int vector[4], int vector[4])(<4 x i32> noundef [[REG255]], <4 x i32> noundef [[REG256]])
-// CHECK-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG257]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG258]], <4 x i32>* [[REG245]], align 16
-// CHECK-NEXT: [[REG259:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG245]], align 16
-// CHECK-NEXT: [[REG260:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG259]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG260]]
-
-// CHECK: define available_externally i64 @_mm_hadds_pi16(i64 noundef [[REG261:[0-9a-zA-Z_%.]+]], i64 noundef [[REG262:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG261]], i64* [[REG263:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG262]], i64* [[REG264:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG265:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG266:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG263]], align 8
-// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG266]], i32 0
-// CHECK-NEXT: [[REG268:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG264]], align 8
-// CHECK-NEXT: [[REG269:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG267]], i64 [[REG268]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG269]], <2 x i64>* [[REG270:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG271:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG270]], align 16
-// CHECK-NEXT: [[REG272:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG271]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG272]], <8 x i16>* [[REG273:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG274:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG273]], align 16
-// CHECK-NEXT: [[REG275:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(short vector[8], int vector[4])(<8 x i16> noundef [[REG274]], <4 x i32> noundef zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG275]], <4 x i32>* [[REG276:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG276]], align 16
-// CHECK-NEXT: [[REG278:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG276]], align 16
-// CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_packs(int vector[4], int vector[4])(<4 x i32> noundef [[REG277]], <4 x i32> noundef [[REG278]])
-// CHECK-NEXT: store <8 x i16> [[REG279]], <8 x i16>* [[REG273]], align 16
-// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG273]], align 16
-// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG280]] to <2 x i64>
-// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG281]], i32 1
-// CHECK-NEXT: ret i64 [[REG282]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_hadd_epi16
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
+// CHECK: call <8 x i16> @vec_add(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_hadd_epi32
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>)
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>)
+// CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally i64 @_mm_hadd_pi16
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
+// CHECK: call <8 x i16> @vec_add(short vector[8], short vector[8])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 1
+
+// CHECK-LABEL: define available_externally i64 @_mm_hadd_pi32
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>)
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
+// CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 1
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_hadds_epi16
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_sum4s(short vector[8], int vector[4])
+// CHECK: call <4 x i32> @vec_sum4s(short vector[8], int vector[4])
+// CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally i64 @_mm_hadds_pi16
+// CHECK: call <4 x i32> @vec_sum4s(short vector[8], int vector[4])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 1
 
 void __attribute__((noinline))
 test_hsub() {
@@ -422,164 +168,53 @@ test_hsub() {
 
 // CHECK-LABEL: @test_hsub
 
-// CHECK: define available_externally <2 x i64> @_mm_hsub_epi16(<2 x i64> noundef [[REG283:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG284:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG283]], <2 x i64>* [[REG285:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG284]], <2 x i64>* [[REG286:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* [[REG287:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* [[REG288:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG289:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG285]], align 16
-// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG289]] to <8 x i16>
-// CHECK-NEXT: [[REG291:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG286]], align 16
-// CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG291]] to <8 x i16>
-// CHECK-NEXT: [[REG293:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG290]], <8 x i16> noundef [[REG292]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
-// CHECK-NEXT: store <8 x i16> [[REG293]], <8 x i16>* [[REG294:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG295:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG285]], align 16
-// CHECK-NEXT: [[REG296:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG295]] to <8 x i16>
-// CHECK-NEXT: [[REG297:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG286]], align 16
-// CHECK-NEXT: [[REG298:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG297]] to <8 x i16>
-// CHECK-NEXT: [[REG299:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG296]], <8 x i16> noundef [[REG298]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
-// CHECK-NEXT: store <8 x i16> [[REG299]], <8 x i16>* [[REG300:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG301:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG294]], align 16
-// CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG300]], align 16
-// CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sub(short vector[8], short vector[8])(<8 x i16> noundef [[REG301]], <8 x i16> noundef [[REG302]])
-// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG303]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG304]]
-
-// CHECK: define available_externally <2 x i64> @_mm_hsub_epi32(<2 x i64> noundef [[REG305:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG306:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG305]], <2 x i64>* [[REG307:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG306]], <2 x i64>* [[REG308:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* [[REG309:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* [[REG310:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG307]], align 16
-// CHECK-NEXT: [[REG312:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG311]] to <4 x i32>
-// CHECK-NEXT: [[REG313:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG308]], align 16
-// CHECK-NEXT: [[REG314:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG313]] to <4 x i32>
-// CHECK-NEXT: [[REG315:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG312]], <4 x i32> noundef [[REG314]], <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>)
-// CHECK-NEXT: store <4 x i32> [[REG315]], <4 x i32>* [[REG316:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG307]], align 16
-// CHECK-NEXT: [[REG318:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG317]] to <4 x i32>
-// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG308]], align 16
-// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG319]] to <4 x i32>
-// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG318]], <4 x i32> noundef [[REG320]], <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>)
-// CHECK-NEXT: store <4 x i32> [[REG321]], <4 x i32>* [[REG322:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG316]], align 16
-// CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG322]], align 16
-// CHECK-NEXT: [[REG325:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sub(int vector[4], int vector[4])(<4 x i32> noundef [[REG323]], <4 x i32> noundef [[REG324]])
-// CHECK-NEXT: [[REG326:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG325]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG326]]
-
-// CHECK: define available_externally i64 @_mm_hsub_pi16(i64 noundef [[REG327:[0-9a-zA-Z_%.]+]], i64 noundef [[REG328:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG327]], i64* [[REG329:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG328]], i64* [[REG330:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, <16 x i8>* [[REG331:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, <16 x i8>* [[REG332:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG333:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG329]], align 8
-// CHECK-NEXT: [[REG334:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG333]], i32 0
-// CHECK-NEXT: [[REG335:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG330]], align 8
-// CHECK-NEXT: [[REG336:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG334]], i64 [[REG335]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG336]], <2 x i64>* [[REG337:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG337]], align 16
-// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG338]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG339]], <8 x i16>* [[REG340:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG342:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG343:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG341]], <8 x i16> noundef [[REG342]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
-// CHECK-NEXT: store <8 x i16> [[REG343]], <8 x i16>* [[REG344:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG345]], <8 x i16> noundef [[REG346]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
-// CHECK-NEXT: store <8 x i16> [[REG347]], <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG348:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG349:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG344]], align 16
-// CHECK-NEXT: [[REG350:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sub(short vector[8], short vector[8])(<8 x i16> noundef [[REG348]], <8 x i16> noundef [[REG349]])
-// CHECK-NEXT: store <8 x i16> [[REG350]], <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG351:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG340]], align 16
-// CHECK-NEXT: [[REG352:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG351]] to <2 x i64>
-// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG352]], i32 1
-// CHECK-NEXT: ret i64 [[REG353]]
-
-// CHECK: define available_externally i64 @_mm_hsub_pi32(i64 noundef [[REG354:[0-9a-zA-Z_%.]+]], i64 noundef [[REG355:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG354]], i64* [[REG356:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG355]], i64* [[REG357:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>, <16 x i8>* [[REG358:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>, <16 x i8>* [[REG359:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG356]], align 8
-// CHECK-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG360]], i32 0
-// CHECK-NEXT: [[REG362:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG357]], align 8
-// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG361]], i64 [[REG362]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG363]], <2 x i64>* [[REG364:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG364]], align 16
-// CHECK-NEXT: [[REG366:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG365]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG366]], <4 x i32>* [[REG367:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG369:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG370:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG368]], <4 x i32> noundef [[REG369]], <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>)
-// CHECK-NEXT: store <4 x i32> [[REG370]], <4 x i32>* [[REG371:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG372:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG373:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG374:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG372]], <4 x i32> noundef [[REG373]], <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
-// CHECK-NEXT: store <4 x i32> [[REG374]], <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG375:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG371]], align 16
-// CHECK-NEXT: [[REG377:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sub(int vector[4], int vector[4])(<4 x i32> noundef [[REG375]], <4 x i32> noundef [[REG376]])
-// CHECK-NEXT: store <4 x i32> [[REG377]], <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG367]], align 16
-// CHECK-NEXT: [[REG379:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG378]] to <2 x i64>
-// CHECK-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG379]], i32 1
-// CHECK-NEXT: ret i64 [[REG380]]
-
-// CHECK: define available_externally <2 x i64> @_mm_hsubs_epi16(<2 x i64> noundef [[REG381:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG382:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG381]], <2 x i64>* [[REG383:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG382]], <2 x i64>* [[REG384:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* [[REG385:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* [[REG386:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG383]], align 16
-// CHECK-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG387]] to <8 x i16>
-// CHECK-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG384]], align 16
-// CHECK-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG389]] to <8 x i16>
-// CHECK-NEXT: [[REG391:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG388]], <8 x i16> noundef [[REG390]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
-// CHECK-NEXT: store <8 x i16> [[REG391]], <8 x i16>* [[REG392:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG393:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG383]], align 16
-// CHECK-NEXT: [[REG394:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG393]] to <8 x i16>
-// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG384]], align 16
-// CHECK-NEXT: [[REG396:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG395]] to <8 x i16>
-// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG394]], <8 x i16> noundef [[REG396]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
-// CHECK-NEXT: store <8 x i16> [[REG397]], <8 x i16>* [[REG398:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG399:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG392]], align 16
-// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG398]], align 16
-// CHECK-NEXT: [[REG401:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_subs(short vector[8], short vector[8])(<8 x i16> noundef [[REG399]], <8 x i16> noundef [[REG400]])
-// CHECK-NEXT: [[REG402:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG401]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG402]]
-
-// CHECK: define available_externally i64 @_mm_hsubs_pi16(i64 noundef [[REG403:[0-9a-zA-Z_%.]+]], i64 noundef [[REG404:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG403]], i64* [[REG405:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG404]], i64* [[REG406:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, <16 x i8>* [[REG407:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, <16 x i8>* [[REG408:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG409:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG405]], align 8
-// CHECK-NEXT: [[REG410:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG409]], i32 0
-// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG406]], align 8
-// CHECK-NEXT: [[REG412:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG410]], i64 [[REG411]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG412]], <2 x i64>* [[REG413:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG414:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG413]], align 16
-// CHECK-NEXT: [[REG415:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG414]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG415]], <8 x i16>* [[REG416:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG417:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG416]], align 16
-// CHECK-NEXT: [[REG418:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG416]], align 16
-// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG417]], <8 x i16> noundef [[REG418]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
-// CHECK-NEXT: store <8 x i16> [[REG419]], <8 x i16>* [[REG420:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG421:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG416]], align 16
-// CHECK-NEXT: [[REG422:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG416]], align 16
-// CHECK-NEXT: [[REG423:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG421]], <8 x i16> noundef [[REG422]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
-// CHECK-NEXT: store <8 x i16> [[REG423]], <8 x i16>* [[REG424:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG425:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG420]], align 16
-// CHECK-NEXT: [[REG426:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG424]], align 16
-// CHECK-NEXT: [[REG427:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_subs(short vector[8], short vector[8])(<8 x i16> noundef [[REG425]], <8 x i16> noundef [[REG426]])
-// CHECK-NEXT: store <8 x i16> [[REG427]], <8 x i16>* [[REG416]], align 16
-// CHECK-NEXT: [[REG428:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG416]], align 16
-// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG428]] to <2 x i64>
-// CHECK-NEXT: [[REG430:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG429]], i32 1
-// CHECK-NEXT: ret i64 [[REG430]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_hsub_epi16
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
+// CHECK: call <8 x i16> @vec_sub(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_hsub_epi32
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>)
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>)
+// CHECK: call <4 x i32> @vec_sub(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally i64 @_mm_hsub_pi16
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
+// CHECK: call <8 x i16> @vec_sub(short vector[8], short vector[8])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 1
+
+// CHECK-LABEL: define available_externally i64 @_mm_hsub_pi32
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>)
+// CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
+// CHECK: call <4 x i32> @vec_sub(int vector[4], int vector[4])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 1
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_hsubs_epi16
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
+// CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_hsubs_pi16
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
+// CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 1
 
 void __attribute__((noinline))
 test_shuffle() {
@@ -589,64 +224,18 @@ test_shuffle() {
 
 // CHECK-LABEL: @test_shuffle
 
-// CHECK: define available_externally <2 x i64> @_mm_shuffle_epi8(<2 x i64> noundef [[REG431:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG432:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG431]], <2 x i64>* [[REG433:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG432]], <2 x i64>* [[REG434:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG435:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG436:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG434]], align 16
-// CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG436]] to <16 x i8>
-// CHECK-NEXT: [[REG438:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG437]], <16 x i8> noundef zeroinitializer)
-// CHECK-NEXT: store <16 x i8> [[REG438]], <16 x i8>* [[REG439:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG440:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG433]], align 16
-// CHECK-NEXT: [[REG441:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG440]] to <16 x i8>
-// CHECK-NEXT: [[REG442:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG433]], align 16
-// CHECK-NEXT: [[REG443:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG442]] to <16 x i8>
-// CHECK-NEXT: [[REG444:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG434]], align 16
-// CHECK-NEXT: [[REG445:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG444]] to <16 x i8>
-// CHECK-NEXT: [[REG446:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_perm(signed char vector[16], signed char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG441]], <16 x i8> noundef [[REG443]], <16 x i8> noundef [[REG445]])
-// CHECK-NEXT: store <16 x i8> [[REG446]], <16 x i8>* [[REG447:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG447]], align 16
-// CHECK-NEXT: [[REG449:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG439]], align 16
-// CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(signed char vector[16], signed char vector[16], bool vector[16])(<16 x i8> noundef [[REG448]], <16 x i8> noundef zeroinitializer, <16 x i8> noundef [[REG449]])
-// CHECK-NEXT: [[REG451:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG450]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG451]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_shuffle_epi8
+// CHECK: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
+// CHECK: call <16 x i8> @vec_perm(signed char vector[16], signed char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_sel(signed char vector[16], signed char vector[16], bool vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
 
-// CHECK: define available_externally i64 @_mm_shuffle_pi8(i64 noundef [[REG452:[0-9a-zA-Z_%.]+]], i64 noundef [[REG453:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG452]], i64* [[REG454:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG453]], i64* [[REG455:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG456:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG457:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG454]], align 8
-// CHECK-NEXT: [[REG458:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG457]], i32 0
-// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG454]], align 8
-// CHECK-NEXT: [[REG460:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG458]], i64 [[REG459]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG460]], <2 x i64>* [[REG461:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG461]], align 16
-// CHECK-NEXT: [[REG463:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG462]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG463]], <16 x i8>* [[REG464:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG455]], align 8
-// CHECK-NEXT: [[REG466:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG465]], i32 0
-// CHECK-NEXT: [[REG467:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG455]], align 8
-// CHECK-NEXT: [[REG468:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG466]], i64 [[REG467]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG468]], <2 x i64>* [[REG469:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG470:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG469]], align 16
-// CHECK-NEXT: [[REG471:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG470]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG471]], <16 x i8>* [[REG472:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG473:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG472]], align 16
-// CHECK-NEXT: [[REG474:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG473]], <16 x i8> noundef zeroinitializer)
-// CHECK-NEXT: store <16 x i8> [[REG474]], <16 x i8>* [[REG475:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG476:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG464]], align 16
-// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG464]], align 16
-// CHECK-NEXT: [[REG478:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG472]], align 16
-// CHECK-NEXT: [[REG479:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_perm(signed char vector[16], signed char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG476]], <16 x i8> noundef [[REG477]], <16 x i8> noundef [[REG478]])
-// CHECK-NEXT: store <16 x i8> [[REG479]], <16 x i8>* [[REG464]], align 16
-// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG464]], align 16
-// CHECK-NEXT: [[REG481:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG475]], align 16
-// CHECK-NEXT: [[REG482:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(signed char vector[16], signed char vector[16], bool vector[16])(<16 x i8> noundef [[REG480]], <16 x i8> noundef zeroinitializer, <16 x i8> noundef [[REG481]])
-// CHECK-NEXT: store <16 x i8> [[REG482]], <16 x i8>* [[REG464]], align 16
-// CHECK-NEXT: [[REG483:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG464]], align 16
-// CHECK-NEXT: [[REG484:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG483]] to <2 x i64>
-// CHECK-NEXT: [[REG485:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG484]], i32 0
-// CHECK-NEXT: ret i64 [[REG485]]
+// CHECK-LABEL: define available_externally i64 @_mm_shuffle_pi8
+// CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
+// CHECK: call <16 x i8> @vec_perm(signed char vector[16], signed char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_sel(signed char vector[16], signed char vector[16], bool vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
 void __attribute__((noinline))
 test_sign() {
@@ -660,173 +249,45 @@ test_sign() {
 
 // CHECK-LABEL: @test_sign
 
-// CHECK: define available_externally <2 x i64> @_mm_sign_epi8(<2 x i64> noundef [[REG486:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG487:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG486]], <2 x i64>* [[REG488:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG487]], <2 x i64>* [[REG489:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG490:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG491:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG489]], align 16
-// CHECK-NEXT: [[REG492:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG491]] to <16 x i8>
-// CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG492]], <16 x i8> noundef zeroinitializer)
-// CHECK-NEXT: store <16 x i8> [[REG493]], <16 x i8>* [[REG494:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG489]], align 16
-// CHECK-NEXT: [[REG496:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG495]] to <16 x i8>
-// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG496]], <16 x i8> noundef zeroinitializer)
-// CHECK-NEXT: [[REG498:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_neg(signed char vector[16])(<16 x i8> noundef [[REG497]])
-// CHECK-NEXT: store <16 x i8> [[REG498]], <16 x i8>* [[REG499:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG494]], align 16
-// CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG499]], align 16
-// CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG500]], <16 x i8> noundef [[REG501]])
-// CHECK-NEXT: store <16 x i8> [[REG502]], <16 x i8>* [[REG503:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG504:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG488]], align 16
-// CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG504]] to <16 x i8>
-// CHECK-NEXT: [[REG506:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG503]], align 16
-// CHECK-NEXT: [[REG507:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mul(signed char vector[16], signed char vector[16])(<16 x i8> noundef [[REG505]], <16 x i8> noundef [[REG506]])
-// CHECK-NEXT: [[REG508:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG507]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG508]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sign_epi16(<2 x i64> noundef [[REG509:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG510:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG509]], <2 x i64>* [[REG511:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG510]], <2 x i64>* [[REG512:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* [[REG513:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG514:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG512]], align 16
-// CHECK-NEXT: [[REG515:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG514]] to <8 x i16>
-// CHECK-NEXT: [[REG516:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> noundef [[REG515]], <8 x i16> noundef zeroinitializer)
-// CHECK-NEXT: store <8 x i16> [[REG516]], <8 x i16>* [[REG517:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG518:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG512]], align 16
-// CHECK-NEXT: [[REG519:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG518]] to <8 x i16>
-// CHECK-NEXT: [[REG520:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> noundef [[REG519]], <8 x i16> noundef zeroinitializer)
-// CHECK-NEXT: [[REG521:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_neg(short vector[8])(<8 x i16> noundef [[REG520]])
-// CHECK-NEXT: store <8 x i16> [[REG521]], <8 x i16>* [[REG522:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG523:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG517]], align 16
-// CHECK-NEXT: [[REG524:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG522]], align 16
-// CHECK-NEXT: [[REG525:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_add(short vector[8], short vector[8])(<8 x i16> noundef [[REG523]], <8 x i16> noundef [[REG524]])
-// CHECK-NEXT: store <8 x i16> [[REG525]], <8 x i16>* [[REG526:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG511]], align 16
-// CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG527]] to <8 x i16>
-// CHECK-NEXT: [[REG529:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG526]], align 16
-// CHECK-NEXT: [[REG530:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mul(short vector[8], short vector[8])(<8 x i16> noundef [[REG528]], <8 x i16> noundef [[REG529]])
-// CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG530]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG531]]
-
-// CHECK: define available_externally <2 x i64> @_mm_sign_epi32(<2 x i64> noundef [[REG532:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG533:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG532]], <2 x i64>* [[REG534:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG533]], <2 x i64>* [[REG535:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG536:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG537:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG535]], align 16
-// CHECK-NEXT: [[REG538:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG537]] to <4 x i32>
-// CHECK-NEXT: [[REG539:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(int vector[4], int vector[4])(<4 x i32> noundef [[REG538]], <4 x i32> noundef zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG539]], <4 x i32>* [[REG540:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG535]], align 16
-// CHECK-NEXT: [[REG542:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG541]] to <4 x i32>
-// CHECK-NEXT: [[REG543:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])(<4 x i32> noundef [[REG542]], <4 x i32> noundef zeroinitializer)
-// CHECK-NEXT: [[REG544:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_neg(int vector[4])(<4 x i32> noundef [[REG543]])
-// CHECK-NEXT: store <4 x i32> [[REG544]], <4 x i32>* [[REG545:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG546:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG540]], align 16
-// CHECK-NEXT: [[REG547:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG545]], align 16
-// CHECK-NEXT: [[REG548:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> noundef [[REG546]], <4 x i32> noundef [[REG547]])
-// CHECK-NEXT: store <4 x i32> [[REG548]], <4 x i32>* [[REG549:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG550:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG534]], align 16
-// CHECK-NEXT: [[REG551:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG550]] to <4 x i32>
-// CHECK-NEXT: [[REG552:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG549]], align 16
-// CHECK-NEXT: [[REG553:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mul(int vector[4], int vector[4])(<4 x i32> noundef [[REG551]], <4 x i32> noundef [[REG552]])
-// CHECK-NEXT: [[REG554:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG553]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG554]]
-
-// CHECK: define available_externally i64 @_mm_sign_pi8(i64 noundef [[REG555:[0-9a-zA-Z_%.]+]], i64 noundef [[REG556:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG555]], i64* [[REG557:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG556]], i64* [[REG558:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* [[REG559:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG560:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG557]], align 8
-// CHECK-NEXT: [[REG561:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG560]], i32 0
-// CHECK-NEXT: [[REG562:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG557]], align 8
-// CHECK-NEXT: [[REG563:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG561]], i64 [[REG562]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG563]], <2 x i64>* [[REG564:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG565:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG564]], align 16
-// CHECK-NEXT: [[REG566:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG565]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG566]], <16 x i8>* [[REG567:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG568:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG558]], align 8
-// CHECK-NEXT: [[REG569:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG568]], i32 0
-// CHECK-NEXT: [[REG570:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG558]], align 8
-// CHECK-NEXT: [[REG571:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG569]], i64 [[REG570]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG571]], <2 x i64>* [[REG572:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG573:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG572]], align 16
-// CHECK-NEXT: [[REG574:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG573]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG574]], <16 x i8>* [[REG575:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG576:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG567]], align 16
-// CHECK-NEXT: [[REG577:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG576]] to <2 x i64>
-// CHECK-NEXT: [[REG578:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG575]], align 16
-// CHECK-NEXT: [[REG579:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG578]] to <2 x i64>
-// CHECK-NEXT: [[REG580:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_sign_epi8(<2 x i64> noundef [[REG577]], <2 x i64> noundef [[REG579]])
-// CHECK-NEXT: [[REG581:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG580]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG581]], <16 x i8>* [[REG567]], align 16
-// CHECK-NEXT: [[REG582:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG567]], align 16
-// CHECK-NEXT: [[REG583:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG582]] to <2 x i64>
-// CHECK-NEXT: [[REG584:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG583]], i32 0
-// CHECK-NEXT: ret i64 [[REG584]]
-
-// CHECK: define available_externally i64 @_mm_sign_pi16(i64 noundef [[REG585:[0-9a-zA-Z_%.]+]], i64 noundef [[REG586:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG585]], i64* [[REG587:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG586]], i64* [[REG588:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* [[REG589:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG587]], align 8
-// CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG590]], i32 0
-// CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG587]], align 8
-// CHECK-NEXT: [[REG593:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG591]], i64 [[REG592]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG593]], <2 x i64>* [[REG594:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG595:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG594]], align 16
-// CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG595]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG596]], <8 x i16>* [[REG597:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG598:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG588]], align 8
-// CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG598]], i32 0
-// CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG588]], align 8
-// CHECK-NEXT: [[REG601:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG599]], i64 [[REG600]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG601]], <2 x i64>* [[REG602:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG602]], align 16
-// CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG603]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG604]], <8 x i16>* [[REG605:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG597]], align 16
-// CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG606]] to <2 x i64>
-// CHECK-NEXT: [[REG608:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG605]], align 16
-// CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG608]] to <2 x i64>
-// CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_sign_epi16(<2 x i64> noundef [[REG607]], <2 x i64> noundef [[REG609]])
-// CHECK-NEXT: [[REG611:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG610]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG611]], <8 x i16>* [[REG597]], align 16
-// CHECK-NEXT: [[REG612:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG597]], align 16
-// CHECK-NEXT: [[REG613:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG612]] to <2 x i64>
-// CHECK-NEXT: [[REG614:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG613]], i32 0
-// CHECK-NEXT: ret i64 [[REG614]]
-
-// CHECK: define available_externally i64 @_mm_sign_pi32(i64 noundef [[REG615:[0-9a-zA-Z_%.]+]], i64 noundef [[REG616:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG615]], i64* [[REG617:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG616]], i64* [[REG618:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[REG619:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG620:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG617]], align 8
-// CHECK-NEXT: [[REG621:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG620]], i32 0
-// CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG617]], align 8
-// CHECK-NEXT: [[REG623:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG621]], i64 [[REG622]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG623]], <2 x i64>* [[REG624:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG624]], align 16
-// CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG625]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG626]], <4 x i32>* [[REG627:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG618]], align 8
-// CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG628]], i32 0
-// CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG618]], align 8
-// CHECK-NEXT: [[REG631:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG629]], i64 [[REG630]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG631]], <2 x i64>* [[REG632:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG633:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG632]], align 16
-// CHECK-NEXT: [[REG634:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG633]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG634]], <4 x i32>* [[REG635:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG636:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG627]], align 16
-// CHECK-NEXT: [[REG637:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG636]] to <2 x i64>
-// CHECK-NEXT: [[REG638:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG635]], align 16
-// CHECK-NEXT: [[REG639:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG638]] to <2 x i64>
-// CHECK-NEXT: [[REG640:[0-9a-zA-Z_%.]+]] = call <2 x i64> @_mm_sign_epi32(<2 x i64> noundef [[REG637]], <2 x i64> noundef [[REG639]])
-// CHECK-NEXT: [[REG641:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG640]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG641]], <4 x i32>* [[REG627]], align 16
-// CHECK-NEXT: [[REG642:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG627]], align 16
-// CHECK-NEXT: [[REG643:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG642]] to <2 x i64>
-// CHECK-NEXT: [[REG644:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG643]], i32 0
-// CHECK-NEXT: ret i64 [[REG644]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sign_epi8
+// CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
+// CHECK: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
+// CHECK: call <16 x i8> @vec_neg(signed char vector[16])
+// CHECK: call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])
+// CHECK: call <16 x i8> @vec_mul(signed char vector[16], signed char vector[16])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sign_epi16
+// CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
+// CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
+// CHECK: call <8 x i16> @vec_neg(short vector[8])
+// CHECK: call <8 x i16> @vec_add(short vector[8], short vector[8])
+// CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_sign_epi32
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <4 x i32> @vec_cmplt(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK: call <4 x i32> @vec_neg(int vector[4])
+// CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
+
+// CHECK-LABEL: define available_externally i64 @_mm_sign_pi8
+// CHECK: store <16 x i8> zeroinitializer, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <2 x i64> @_mm_sign_epi8
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_sign_pi16
+// CHECK: store <8 x i16> zeroinitializer, <8 x i16>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <2 x i64> @_mm_sign_epi16
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_sign_pi32
+// CHECK: store <4 x i32> zeroinitializer, <4 x i32>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <2 x i64> @_mm_sign_epi32
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
 void __attribute__((noinline))
 test_maddubs() {
@@ -836,110 +297,35 @@ test_maddubs() {
 
 // CHECK-LABEL: @test_maddubs
 
-// CHECK: define available_externally <2 x i64> @_mm_maddubs_epi16(<2 x i64> noundef [[REG645:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG646:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG645]], <2 x i64>* [[REG647:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG646]], <2 x i64>* [[REG648:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG649:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(short)(i16 noundef signext 255)
-// CHECK-NEXT: store <8 x i16> [[REG649]], <8 x i16>* [[REG650:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG651:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG647]], align 16
-// CHECK-NEXT: [[REG652:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG651]] to <16 x i8>
-// CHECK-NEXT: [[REG653:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_unpackh(signed char vector[16])(<16 x i8> noundef [[REG652]])
-// CHECK-NEXT: [[REG654:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG650]], align 16
-// CHECK-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_and(short vector[8], short vector[8])(<8 x i16> noundef [[REG653]], <8 x i16> noundef [[REG654]])
-// CHECK-NEXT: store <8 x i16> [[REG655]], <8 x i16>* [[REG656:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG647]], align 16
-// CHECK-NEXT: [[REG658:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG657]] to <16 x i8>
-// CHECK-NEXT: [[REG659:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_unpackl(signed char vector[16])(<16 x i8> noundef [[REG658]])
-// CHECK-NEXT: [[REG660:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG650]], align 16
-// CHECK-NEXT: [[REG661:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_and(short vector[8], short vector[8])(<8 x i16> noundef [[REG659]], <8 x i16> noundef [[REG660]])
-// CHECK-NEXT: store <8 x i16> [[REG661]], <8 x i16>* [[REG662:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG663:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG648]], align 16
-// CHECK-NEXT: [[REG664:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG663]] to <16 x i8>
-// CHECK-NEXT: [[REG76:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_unpackh(signed char vector[16])(<16 x i8> noundef [[REG664]])
-// CHECK-NEXT: store <8 x i16> [[REG76]], <8 x i16>* [[REG665:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG666:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG648]], align 16
-// CHECK-NEXT: [[REG667:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG666]] to <16 x i8>
-// CHECK-NEXT: [[REG668:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_unpackl(signed char vector[16])(<16 x i8> noundef [[REG667]])
-// CHECK-NEXT: store <8 x i16> [[REG668]], <8 x i16>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG670:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG656]], align 16
-// CHECK-NEXT: [[REG671:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG665]], align 16
-// CHECK-NEXT: [[REG672:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mul(short vector[8], short vector[8])(<8 x i16> noundef [[REG670]], <8 x i16> noundef [[REG671]])
-// CHECK-NEXT: store <8 x i16> [[REG672]], <8 x i16>* [[REG656]], align 16
-// CHECK-NEXT: [[REG673:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG662]], align 16
-// CHECK-NEXT: [[REG674:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG669]], align 16
-// CHECK-NEXT: [[REG675:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mul(short vector[8], short vector[8])(<8 x i16> noundef [[REG673]], <8 x i16> noundef [[REG674]])
-// CHECK-NEXT: store <8 x i16> [[REG675]], <8 x i16>* [[REG662]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* [[REG676:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* [[REG677:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG678:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG656]], align 16
-// CHECK-NEXT: [[REG679:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG662]], align 16
-// CHECK-NEXT: [[REG680:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG678]], <8 x i16> noundef [[REG679]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
-// CHECK-NEXT: store <8 x i16> [[REG680]], <8 x i16>* [[REG665]], align 16
-// CHECK-NEXT: [[REG681:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG656]], align 16
-// CHECK-NEXT: [[REG682:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG662]], align 16
-// CHECK-NEXT: [[REG683:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG681]], <8 x i16> noundef [[REG682]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
-// CHECK-NEXT: store <8 x i16> [[REG683]], <8 x i16>* [[REG669]], align 16
-// CHECK-NEXT: [[REG684:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG665]], align 16
-// CHECK-NEXT: [[REG685:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG669]], align 16
-// CHECK-NEXT: [[REG686:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> noundef [[REG684]], <8 x i16> noundef [[REG685]])
-// CHECK-NEXT: [[REG687:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG686]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG687]]
-
-// CHECK: define available_externally i64 @_mm_maddubs_pi16(i64 noundef [[REG688:[0-9a-zA-Z_%.]+]], i64 noundef [[REG689:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG688]], i64* [[REG690:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG689]], i64* [[REG691:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG692:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG690]], align 8
-// CHECK-NEXT: [[REG75:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG692]], i32 0
-// CHECK-NEXT: [[REG693:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG690]], align 8
-// CHECK-NEXT: [[REG694:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG75]], i64 [[REG693]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG694]], <2 x i64>* [[REG80:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG695:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG80]], align 16
-// CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG695]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG84]], <8 x i16>* [[REG696:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG697:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG698:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG697]] to <16 x i8>
-// CHECK-NEXT: [[REG699:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_unpackl(signed char vector[16])(<16 x i8> noundef [[REG698]])
-// CHECK-NEXT: store <8 x i16> [[REG699]], <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG700:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(short)(i16 noundef signext 255)
-// CHECK-NEXT: store <8 x i16> [[REG700]], <8 x i16>* [[REG701:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG702:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG703:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG701]], align 16
-// CHECK-NEXT: [[REG704:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_and(short vector[8], short vector[8])(<8 x i16> noundef [[REG702]], <8 x i16> noundef [[REG703]])
-// CHECK-NEXT: store <8 x i16> [[REG704]], <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG705:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG691]], align 8
-// CHECK-NEXT: [[REG706:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG705]], i32 0
-// CHECK-NEXT: [[REG707:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG691]], align 8
-// CHECK-NEXT: [[REG708:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG706]], i64 [[REG707]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG708]], <2 x i64>* [[REG709:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG710:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG709]], align 16
-// CHECK-NEXT: [[REG711:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG710]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG711]], <8 x i16>* [[REG712:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG713:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG714:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG713]] to <16 x i8>
-// CHECK-NEXT: [[REG715:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_unpackl(signed char vector[16])(<16 x i8> noundef [[REG714]])
-// CHECK-NEXT: store <8 x i16> [[REG715]], <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG716:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG717:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG718:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mul(short vector[8], short vector[8])(<8 x i16> noundef [[REG716]], <8 x i16> noundef [[REG717]])
-// CHECK-NEXT: store <8 x i16> [[REG718]], <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* [[REG719:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* [[REG720:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG721:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG722:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG723:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG721]], <8 x i16> noundef [[REG722]], <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
-// CHECK-NEXT: store <8 x i16> [[REG723]], <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG724:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG725:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG726:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef [[REG724]], <8 x i16> noundef [[REG725]], <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
-// CHECK-NEXT: store <8 x i16> [[REG726]], <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG727:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG728:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG712]], align 16
-// CHECK-NEXT: [[REG729:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> noundef [[REG727]], <8 x i16> noundef [[REG728]])
-// CHECK-NEXT: store <8 x i16> [[REG729]], <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG730:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG696]], align 16
-// CHECK-NEXT: [[REG731:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG730]] to <2 x i64>
-// CHECK-NEXT: [[REG732:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG731]], i32 0
-// CHECK-NEXT: ret i64 [[REG732]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_maddubs_epi16
+// CHECK: call <8 x i16> @vec_splats(short)(i16 noundef signext 255)
+// CHECK: call <8 x i16> @vec_unpackh(signed char vector[16])
+// CHECK: call <8 x i16> @vec_and(short vector[8], short vector[8])
+// CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
+// CHECK: call <8 x i16> @vec_and(short vector[8], short vector[8])
+// CHECK: call <8 x i16> @vec_unpackh(signed char vector[16])
+// CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
+// CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
+// CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
+// CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
+
+// CHECK-LABEL: define available_externally i64 @_mm_maddubs_pi16
+// CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
+// CHECK: call <8 x i16> @vec_splats(short)(i16 noundef signext 255)
+// CHECK: call <8 x i16> @vec_and(short vector[8], short vector[8])
+// CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
+// CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
+// CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
+// CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
+// CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
 void __attribute__((noinline))
 test_mulhrs() {
@@ -949,119 +335,33 @@ test_mulhrs() {
 
 // CHECK-LABEL: @test_mulhrs
 
-// CHECK: define available_externally <2 x i64> @_mm_mulhrs_epi16(<2 x i64> noundef [[REG733:[0-9a-zA-Z_%.]+]], <2 x i64> noundef [[REG734:[0-9a-zA-Z_%.]+]])
-// CHECK: store <2 x i64> [[REG733]], <2 x i64>* [[REG735:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <2 x i64> [[REG734]], <2 x i64>* [[REG736:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG737:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG735]], align 16
-// CHECK-NEXT: [[REG738:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG737]] to <8 x i16>
-// CHECK-NEXT: [[REG739:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_unpackh(short vector[8])(<8 x i16> noundef [[REG738]])
-// CHECK-NEXT: store <4 x i32> [[REG739]], <4 x i32>* [[REG740:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG741:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG736]], align 16
-// CHECK-NEXT: [[REG742:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG741]] to <8 x i16>
-// CHECK-NEXT: [[REG743:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_unpackh(short vector[8])(<8 x i16> noundef [[REG742]])
-// CHECK-NEXT: store <4 x i32> [[REG743]], <4 x i32>* [[REG744:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG745:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG746:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG747:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mul(int vector[4], int vector[4])(<4 x i32> noundef [[REG745]], <4 x i32> noundef [[REG746]])
-// CHECK-NEXT: store <4 x i32> [[REG747]], <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG748:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG735]], align 16
-// CHECK-NEXT: [[REG749:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG748]] to <8 x i16>
-// CHECK-NEXT: [[REG750:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_unpackl(short vector[8])(<8 x i16> noundef [[REG749]])
-// CHECK-NEXT: store <4 x i32> [[REG750]], <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG751:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG736]], align 16
-// CHECK-NEXT: [[REG752:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG751]] to <8 x i16>
-// CHECK-NEXT: [[REG753:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_unpackl(short vector[8])(<8 x i16> noundef [[REG752]])
-// CHECK-NEXT: store <4 x i32> [[REG753]], <4 x i32>* [[REG754:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG755:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG756:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG754]], align 16
-// CHECK-NEXT: [[REG757:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mul(int vector[4], int vector[4])(<4 x i32> noundef [[REG755]], <4 x i32> noundef [[REG756]])
-// CHECK-NEXT: store <4 x i32> [[REG757]], <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG758:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext 14)
-// CHECK-NEXT: store <4 x i32> [[REG758]], <4 x i32>* [[REG759:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG761:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG759]], align 16
-// CHECK-NEXT: [[REG762:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG760]], <4 x i32> noundef [[REG761]])
-// CHECK-NEXT: store <4 x i32> [[REG762]], <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG763:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG764:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG759]], align 16
-// CHECK-NEXT: [[REG765:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG763]], <4 x i32> noundef [[REG764]])
-// CHECK-NEXT: store <4 x i32> [[REG765]], <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG766:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(int)(i32 noundef signext 1)
-// CHECK-NEXT: store <4 x i32> [[REG766]], <4 x i32>* [[REG767:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG768:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG769:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG767]], align 16
-// CHECK-NEXT: [[REG770:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> noundef [[REG768]], <4 x i32> noundef [[REG769]])
-// CHECK-NEXT: store <4 x i32> [[REG770]], <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG771:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG772:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG767]], align 16
-// CHECK-NEXT: [[REG773:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG771]], <4 x i32> noundef [[REG772]])
-// CHECK-NEXT: store <4 x i32> [[REG773]], <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG774:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG775:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG767]], align 16
-// CHECK-NEXT: [[REG776:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> noundef [[REG774]], <4 x i32> noundef [[REG775]])
-// CHECK-NEXT: store <4 x i32> [[REG776]], <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG777:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG778:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG767]], align 16
-// CHECK-NEXT: [[REG779:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG777]], <4 x i32> noundef [[REG778]])
-// CHECK-NEXT: store <4 x i32> [[REG779]], <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG780:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG740]], align 16
-// CHECK-NEXT: [[REG781:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG744]], align 16
-// CHECK-NEXT: [[REG782:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef [[REG780]], <4 x i32> noundef [[REG781]])
-// CHECK-NEXT: [[REG783:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG782]] to <2 x i64>
-// CHECK-NEXT: ret <2 x i64> [[REG783]]
-
-// CHECK: define available_externally i64 @_mm_mulhrs_pi16(i64 noundef [[REG784:[0-9a-zA-Z_%.]+]], i64 noundef [[REG785:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG784]], i64* [[REG786:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG785]], i64* [[REG787:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG788:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG786]], align 8
-// CHECK-NEXT: [[REG789:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG788]], i32 0
-// CHECK-NEXT: [[REG790:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG786]], align 8
-// CHECK-NEXT: [[REG791:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG789]], i64 [[REG790]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG791]], <2 x i64>* [[REG792:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG793:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG792]], align 16
-// CHECK-NEXT: [[REG794:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG793]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG794]], <4 x i32>* [[REG795:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG796:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG797:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG796]] to <8 x i16>
-// CHECK-NEXT: [[REG798:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_unpackh(short vector[8])(<8 x i16> noundef [[REG797]])
-// CHECK-NEXT: store <4 x i32> [[REG798]], <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG799:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG787]], align 8
-// CHECK-NEXT: [[REG800:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG799]], i32 0
-// CHECK-NEXT: [[REG801:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG787]], align 8
-// CHECK-NEXT: [[REG802:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG800]], i64 [[REG801]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG802]], <2 x i64>* [[REG803:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG804:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG803]], align 16
-// CHECK-NEXT: [[REG805:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG804]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG805]], <4 x i32>* [[REG806:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG807:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG806]], align 16
-// CHECK-NEXT: [[REG808:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG807]] to <8 x i16>
-// CHECK-NEXT: [[REG809:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_unpackh(short vector[8])(<8 x i16> noundef [[REG808]])
-// CHECK-NEXT: store <4 x i32> [[REG809]], <4 x i32>* [[REG806]], align 16
-// CHECK-NEXT: [[REG810:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG811:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG806]], align 16
-// CHECK-NEXT: [[REG812:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_mul(int vector[4], int vector[4])(<4 x i32> noundef [[REG810]], <4 x i32> noundef [[REG811]])
-// CHECK-NEXT: store <4 x i32> [[REG812]], <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG813:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext 14)
-// CHECK-NEXT: store <4 x i32> [[REG813]], <4 x i32>* [[REG814:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG815:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG816:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG814]], align 16
-// CHECK-NEXT: [[REG817:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG815]], <4 x i32> noundef [[REG816]])
-// CHECK-NEXT: store <4 x i32> [[REG817]], <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG818:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_splats(int)(i32 noundef signext 1)
-// CHECK-NEXT: store <4 x i32> [[REG818]], <4 x i32>* [[REG819:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG819]], align 16
-// CHECK-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> noundef [[REG820]], <4 x i32> noundef [[REG821]])
-// CHECK-NEXT: store <4 x i32> [[REG822]], <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG824:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG819]], align 16
-// CHECK-NEXT: [[REG825:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG823]], <4 x i32> noundef [[REG824]])
-// CHECK-NEXT: store <4 x i32> [[REG825]], <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG826:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG795]], align 16
-// CHECK-NEXT: [[REG827:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG806]], align 16
-// CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef [[REG826]], <4 x i32> noundef [[REG827]])
-// CHECK-NEXT: store <8 x i16> [[REG828]], <8 x i16>* [[REG829:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG830:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG829]], align 16
-// CHECK-NEXT: [[REG831:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG830]] to <2 x i64>
-// CHECK-NEXT: [[REG832:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG831]], i32 0
-// CHECK-NEXT: ret i64 [[REG832]]
+// CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhrs_epi16
+// CHECK: call <4 x i32> @vec_unpackh(short vector[8])
+// CHECK: call <4 x i32> @vec_unpackh(short vector[8])
+// CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_unpackl(short vector[8])
+// CHECK: call <4 x i32> @vec_unpackl(short vector[8])
+// CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext 14)
+// CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
+// CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
+// CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 1)
+// CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
+// CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
+// CHECK: %[[PACK:[0-9a-zA-Z_.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])
+// CHECK: bitcast <8 x i16> %[[PACK]] to <2 x i64>
+
+// CHECK-LABEL: define available_externally i64 @_mm_mulhrs_pi16
+// CHECK: call <4 x i32> @vec_unpackh(short vector[8])
+// CHECK: call <4 x i32> @vec_unpackh(short vector[8])
+// CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext 14)
+// CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
+// CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 1)
+// CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
+// CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
+// CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0

diff  --git a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
index a77b6017752a2..1a331087efd9e 100644
--- a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
@@ -41,31 +40,14 @@ test_add() {
 
 // CHECK-LABEL: @test_add
 
-// CHECK: define available_externally <4 x float> @_mm_add_ps(<4 x float> noundef [[REG1:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG2:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG1]], <4 x float>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG2]], <4 x float>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG3]], align 16
-// CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG4]], align 16
-// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG5]], [[REG6]]
-// CHECK-NEXT: ret <4 x float> [[REG7]]
-
-// CHECK: define available_externally <4 x float> @_mm_add_ss(<4 x float> noundef [[REG8:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG9:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG8]], <4 x float>* [[REG10:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG9]], <4 x float>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
-// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG12]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG13]], <4 x float>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG11]], align 16
-// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG15]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG16]], <4 x float>* [[REG17:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG14]], align 16
-// CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG17]], align 16
-// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG18]], [[REG19]]
-// CHECK-NEXT: store <4 x float> [[REG20]], <4 x float>* [[REG21:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
-// CHECK-NEXT: [[REG23:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG21]], align 16
-// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG22]], <4 x float> noundef [[REG23]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG24]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_add_ps
+// CHECK: fadd <4 x float>
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_add_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: fadd <4 x float>
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
 
 void __attribute__((noinline))
 test_avg() {
@@ -75,45 +57,18 @@ test_avg() {
 
 // CHECK-LABEL: @test_avg
 
-// CHECK: define available_externally i64 @_mm_avg_pu16
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG25]])
-// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG28]])
-// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG29]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG30]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG31]], <8 x i16> noundef [[REG32]])
-// CHECK-NEXT: store <8 x i16> [[REG33]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG34]] to <2 x i64>
-// CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG35]], i32 0
-// CHECK-NEXT: ret i64 [[REG36]]
-
-// CHECK: define available_externally i64 @_mm_avg_pu8
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG37]])
-// CHECK-NEXT: [[REG39:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG38]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG39]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG40]])
-// CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG41]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG42]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG43]], <16 x i8> noundef [[REG44]])
-// CHECK-NEXT: store <16 x i8> [[REG45]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG47:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG46]] to <2 x i64>
-// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG47]], i32 0
-// CHECK-NEXT: ret i64 [[REG48]]
+// CHECK-LABEL: define available_externally i64 @_mm_avg_pu16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])
+// CHECK: bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_avg_pu8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
 
 void __attribute__((noinline))
 test_alt_name_avg() {
@@ -123,13 +78,11 @@ test_alt_name_avg() {
 
 // CHECK-LABEL: @test_alt_name_avg
 
-// CHECK: define available_externally i64 @_m_pavgw
-// CHECK: [[REG49:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu16
-// CHECK-NEXT: ret i64 [[REG49]]
+// CHECK-LABEL: define available_externally i64 @_m_pavgw
+// CHECK: call i64 @_mm_avg_pu16
 
-// CHECK: define available_externally i64 @_m_pavgb
-// CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu8
-// CHECK-NEXT: ret i64 [[REG50]]
+// CHECK-LABEL: define available_externally i64 @_m_pavgb
+// CHECK: call i64 @_mm_avg_pu8
 
 void __attribute__((noinline))
 test_cmp() {
@@ -161,257 +114,127 @@ test_cmp() {
 
 // CHECK-LABEL: @test_cmp
 
-// CHECK: define available_externally <4 x float> @_mm_cmpeq_ps
-// CHECK: [[REG51:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG52:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG51]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG52]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpeq_ss
-// CHECK: [[REG53:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG53]], <4 x float>* [[REG54:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG55]], <4 x float>* [[REG56:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG54]], align 16
-// CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG56]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> noundef [[REG57]], <4 x float> noundef [[REG58]])
-// CHECK: [[REG59:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG59]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpge_ps
-// CHECK: [[REG60:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG60]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG61]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpge_ss
-// CHECK: [[REG62:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG62]], <4 x float>* [[REG63:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG64:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG64]], <4 x float>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG66:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
-// CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG65]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> noundef [[REG66]], <4 x float> noundef [[REG67]])
-// CHECK: [[REG68:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG68]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpgt_ps
-// CHECK: [[REG69:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG69]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG70]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpgt_ss
-// CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG71]], <4 x float>* [[REG72:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG73]], <4 x float>* [[REG74:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG75:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG72]], align 16
-// CHECK-NEXT: [[REG76:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG74]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> noundef [[REG75]], <4 x float> noundef [[REG76]])
-// CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG77]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmple_ps
-// CHECK: [[REG78:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG78]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG79]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmple_ss
-// CHECK: [[REG80:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG80]], <4 x float>* [[REG81:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG82:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG82]], <4 x float>* [[REG83:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG81]], align 16
-// CHECK-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG83]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> noundef [[REG84]], <4 x float> noundef [[REG85]])
-// CHECK: [[REG86:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG86]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmplt_ps
-// CHECK: [[REG87:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG87]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG88]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ps
+// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ps
+// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ps
+// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ps
+// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmplt_ps
+// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
 
 // CHECK: @_mm_cmplt_ss
-// CHECK: [[REG89:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG89]], <4 x float>* [[REG90:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG91]], <4 x float>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG90]], align 16
-// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> noundef [[REG93]], <4 x float> noundef [[REG94]])
-// CHECK: [[REG95:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG95]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpneq_ps
-// CHECK: [[REG96:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG96]] to <4 x float>
-// CHECK-NEXT: store <4 x float> [[REG97]], <4 x float>* [[REG98:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
-// CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
-// CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_nor(float vector[4], float vector[4])(<4 x float> noundef [[REG99]], <4 x float> noundef [[REG100]])
-// CHECK-NEXT: ret <4 x float> [[REG101]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpneq_ss
-// CHECK: [[REG102:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG102]], <4 x float>* [[REG103:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG104:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG104]], <4 x float>* [[REG105:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG106:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG103]], align 16
-// CHECK-NEXT: [[REG107:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG105]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> noundef [[REG106]], <4 x float> noundef [[REG107]])
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ps
+// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
 // CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
-// CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG108]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnge_ps
-// CHECK: [[REG109:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG110:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG109]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG110]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnge_ss
-// CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG111]], <4 x float>* [[REG112:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG113:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG113]], <4 x float>* [[REG114:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG112]], align 16
-// CHECK-NEXT: [[REG116:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG114]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> noundef [[REG115]], <4 x float> noundef [[REG116]])
-// CHECK: [[REG117:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG117]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpngt_ps
-// CHECK: [[REG118:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG118]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG119]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpngt_ss
-// CHECK: [[REG120:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG120]], <4 x float>* [[REG121:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG122:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG122]], <4 x float>* [[REG123:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG124:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG121]], align 16
-// CHECK-NEXT: [[REG125:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG123]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> noundef [[REG124]], <4 x float> noundef [[REG125]])
-// CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG126]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnle_ps
-// CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG128:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG127]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG128]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnle_ss
-// CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG129]], <4 x float>* [[REG130:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG131]], <4 x float>* [[REG132:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG133:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG130]], align 16
-// CHECK-NEXT: [[REG134:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG132]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> noundef [[REG133]], <4 x float> noundef [[REG134]])
-// CHECK: [[REG135:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG135]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ps
-// CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG137:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG136]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG137]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ss
-// CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG138]], <4 x float>* [[REG139:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG140]], <4 x float>* [[REG141:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG142:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG139]], align 16
-// CHECK-NEXT: [[REG143:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG141]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> noundef [[REG142]], <4 x float> noundef [[REG143]])
-// CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG144]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpord_ps
-// CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG146:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG145]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG146]], <4 x i32>* [[REG147:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG149:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG148]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG149]], <4 x i32>* [[REG150:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG147]], align 16
-// CHECK-NEXT: [[REG152:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef [[REG151]])
-// CHECK-NEXT: store <4 x i32> [[REG152]], <4 x i32>* [[REG153:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG150]], align 16
-// CHECK-NEXT: [[REG155:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef [[REG154]])
-// CHECK-NEXT: store <4 x i32> [[REG155]], <4 x i32>* [[REG156:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG153]], align 16
-// CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG156]], align 16
-// CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG160:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG159]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG160]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpord_ss
-// CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
-// CHECK-NEXT: [[REG162:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG161]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG162]], <4 x i32>* [[REG163:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG164:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
-// CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG164]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG165]], <4 x i32>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG163]], align 16
-// CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef [[REG167]])
-// CHECK-NEXT: store <4 x i32> [[REG168]], <4 x i32>* [[REG161:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG166]], align 16
-// CHECK-NEXT: [[REG170:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef [[REG169]])
-// CHECK-NEXT: store <4 x i32> [[REG170]], <4 x i32>* [[REG171:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
-// CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG171]], align 16
-// CHECK-NEXT: [[REG174:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG172]], <4 x i32> noundef [[REG173]])
-// CHECK-NEXT: store <4 x i32> [[REG174]], <4 x i32>* [[REG161]], align 16
-// CHECK: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
-// CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG175]] to <4 x float>
-// CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef [[REG176]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG177]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpunord_ps
-// CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef [[REG178]])
-// CHECK-NEXT: [[REG180:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG179]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG180]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef [[REG181]])
-// CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG182]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG183]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG184:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG184]], <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG185]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG186]], <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG187]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG188:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG189:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG190:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG188]], <4 x i32> noundef [[REG189]])
-// CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG190]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG191]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpunord_ss
-// CHECK: [[REG192:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG193:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef [[REG192]])
-// CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG193]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG194]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG195:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef [[REG195]])
-// CHECK-NEXT: [[REG197:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG196]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG197]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG198]], <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG199]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG200:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG200]], <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG201]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG204:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef [[REG202]], <4 x i32> noundef [[REG203]])
-// CHECK-NEXT: store <4 x i32> [[REG204]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG206]] to <4 x float>
-// CHECK-NEXT: [[REG208:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG205]], <4 x float> noundef [[REG207]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG208]]
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ps
+// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ps
+// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ps
+// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ps
+// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ps
+// CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ss
+// CHECK: call <4 x float> @vec_abs(float vector[4])
+// CHECK: call <4 x float> @vec_abs(float vector[4])
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ps
+// CHECK: call <4 x float> @vec_abs(float vector[4])
+// CHECK: call <4 x float> @vec_abs(float vector[4])
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ss
+// CHECK: call <4 x float> @vec_abs(float vector[4])
+// CHECK: call <4 x float> @vec_abs(float vector[4])
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
 
 void __attribute__((noinline))
 test_comi() {
@@ -425,71 +248,41 @@ test_comi() {
 
 // CHECK-LABEL: @test_comi
 
-// CHECK: define available_externally signext i32 @_mm_comieq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG210:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG209]], i32 0
-// CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG212:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG211]], i32 0
-// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG210]], [[REG212]]
-// CHECK-NEXT: [[REG214:[0-9a-zA-Z_%.]+]] = zext i1 [[REG213]] to i32
-// CHECK-NEXT: ret i32 [[REG214]]
-
-// CHECK: define available_externally signext i32 @_mm_comige_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG215:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG216:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG215]], i32 0
-// CHECK-NEXT: [[REG217:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG218:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG217]], i32 0
-// CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG216]], [[REG218]]
-// CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = zext i1 [[REG219]] to i32
-// CHECK-NEXT: ret i32 [[REG220]]
-
-// CHECK: define available_externally signext i32 @_mm_comigt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG222:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG221]], i32 0
-// CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG224:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG223]], i32 0
-// CHECK-NEXT: [[REG225:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG222]], [[REG224]]
-// CHECK-NEXT: [[REG226:[0-9a-zA-Z_%.]+]] = zext i1 [[REG225]] to i32
-// CHECK-NEXT: ret i32 [[REG226]]
-
-// CHECK: define available_externally signext i32 @_mm_comile_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG227:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG228:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG227]], i32 0
-// CHECK-NEXT: [[REG229:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG230:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG229]], i32 0
-// CHECK-NEXT: [[REG231:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG228]], [[REG230]]
-// CHECK-NEXT: [[REG232:[0-9a-zA-Z_%.]+]] = zext i1 [[REG231]] to i32
-// CHECK-NEXT: ret i32 [[REG232]]
-
-// CHECK: define available_externally signext i32 @_mm_comilt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG233:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG234:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG233]], i32 0
-// CHECK-NEXT: [[REG235:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG236:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG235]], i32 0
-// CHECK-NEXT: [[REG237:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG234]], [[REG236]]
-// CHECK-NEXT: [[REG238:[0-9a-zA-Z_%.]+]] = zext i1 [[REG237]] to i32
-// CHECK-NEXT: ret i32 [[REG238]]
-
-// CHECK: define available_externally signext i32 @_mm_comineq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG239:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG240:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG239]], i32 0
-// CHECK-NEXT: [[REG241:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG242:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG241]], i32 0
-// CHECK-NEXT: [[REG243:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG240]], [[REG242]]
-// CHECK-NEXT: [[REG244:[0-9a-zA-Z_%.]+]] = zext i1 [[REG243]] to i32
-// CHECK-NEXT: ret i32 [[REG244]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_comieq_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq float %[[VAL1]], %[[VAL2]]
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comige_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge float %[[VAL1]], %[[VAL2]]
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comigt_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt float %[[VAL1]], %[[VAL2]]
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comile_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole float %[[VAL1]], %[[VAL2]]
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comilt_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt float %[[VAL1]], %[[VAL2]]
+// CHECK: zext i1 %[[CMP]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_comineq_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une float %[[VAL1]], %[[VAL2]]
+// CHECK: zext i1 %[[CMP]] to i32
 
 void __attribute__((noinline))
 test_convert() {
@@ -520,330 +313,108 @@ test_convert() {
 
 // CHECK-LABEL: @test_convert
 
-// CHECK: define available_externally <4 x float> @_mm_cvt_pi2ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG246:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG247:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtpi32_ps(<4 x float> noundef [[REG245]], i64 noundef [[REG246]])
-// CHECK-NEXT: ret <4 x float> [[REG247]]
-
-// CHECK: define available_externally i64 @_mm_cvt_ps2pi
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG249:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtps_pi32(<4 x float> noundef [[REG248]])
-// CHECK-NEXT: ret i64 [[REG249]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvt_si2ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtsi32_ss(<4 x float> noundef [[REG250]], i32 noundef signext [[REG251]])
-// CHECK-NEXT: ret <4 x float> [[REG252]]
-
-// CHECK: define available_externally signext i32 @_mm_cvt_ss2si
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG253:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG254:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtss_si32(<4 x float> noundef [[REG253]])
-// CHECK-NEXT: ret i32 [[REG254]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi16_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG255:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG256:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG255]], i32 0
-// CHECK-NEXT: [[REG257:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG256]], i64 [[REG257]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG258]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG259:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG260:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG259]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG260]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG261:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupklsh(short vector[8])(<8 x i16> noundef [[REG261]])
-// CHECK-NEXT: store <4 x i32> [[REG262]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG263]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG264]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG265:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG265]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi32_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG266:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG266]], i32 0
-// CHECK-NEXT: [[REG268:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG269:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG267]], i64 [[REG268]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG269]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG270:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG271:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG270]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG271]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG272:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG273:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG272]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG273]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG274:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG275:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG274]] to <2 x i64>
-// CHECK-NEXT: [[REG276:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG275]], i32 0
-// CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG276]], i32 0
-// CHECK-NEXT: [[REG278:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG278]] to <2 x i64>
-// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG279]], i32 1
-// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG277]], i64 [[REG280]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG281]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG283:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG282]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG283]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi32x2_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG284:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG284]], i32 0
-// CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG287:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG285]], i64 [[REG286]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG287]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG288:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG289:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG288]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG289]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG291:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG290]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG291]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG292]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi8_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG293:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG293]], i32 0
-// CHECK-NEXT: [[REG295:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG296:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG294]], i64 [[REG295]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG296]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG297:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG298:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG297]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG298]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG299:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG300:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_vupkhsb(signed char vector[16])(<16 x i8> noundef [[REG299]])
-// CHECK-NEXT: store <8 x i16> [[REG300]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG301:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupkhsh(short vector[8])(<8 x i16> noundef [[REG301]])
-// CHECK-NEXT: store <4 x i32> [[REG302]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG303]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG304]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG305]]
-
-// CHECK: define available_externally i64 @_mm_cvtps_pi16
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG306:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG307:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> noundef [[REG306]])
-// CHECK-NEXT: store <4 x float> [[REG307]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG308]], i32 0)
-// CHECK-NEXT: store <4 x i32> [[REG309]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG312:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef [[REG310]], <4 x i32> noundef [[REG311]])
-// CHECK-NEXT: [[REG313:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG312]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG313]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG314:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG315:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG314]], i32 0
-// CHECK-NEXT: ret i64 [[REG315]]
-
-// CHECK: define available_externally i64 @_mm_cvtps_pi32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG316:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG316]] to <2 x i64>
-// CHECK-NEXT: [[REG318:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef [[REG317]], i32 noundef zeroext 0)
-// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG318]] to <4 x float>
-// CHECK-NEXT: store <4 x float> [[REG319]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> noundef [[REG320]])
-// CHECK-NEXT: store <4 x float> [[REG321]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG322:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG322]], i32 0)
-// CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG323]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG324]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG325:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG326:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG325]], i32 0
-// CHECK-NEXT: ret i64 [[REG326]]
-
-// CHECK: define available_externally i64 @_mm_cvtps_pi8
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG328:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> noundef [[REG327]])
-// CHECK-NEXT: store <4 x float> [[REG328]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG330:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG329]], i32 0)
-// CHECK-NEXT: store <4 x i32> [[REG330]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG332:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef [[REG331]], <4 x i32> noundef zeroinitializer)
-// CHECK-NEXT: store <8 x i16> [[REG332]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG333:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG334:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG335:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(short vector[8], short vector[8])(<8 x i16> noundef [[REG333]], <8 x i16> noundef [[REG334]])
-// CHECK-NEXT: store <16 x i8> [[REG335]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG336:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG337:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG336]] to <2 x i64>
-// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG337]], i32 0
-// CHECK-NEXT: ret i64 [[REG338]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpu16_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG339]], i32 0
-// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG342:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG340]], i64 [[REG341]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG342]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG343:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG343]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG344]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG345]], <8 x i16> noundef zeroinitializer)
-// CHECK-BE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef [[REG345]])
-// CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG346]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG347]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG348:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG349:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG348]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG349]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG350:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG350]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpu8_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG351:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG352:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG351]], i32 0
-// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG354:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG352]], i64 [[REG353]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG354]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG355]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG356]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG357]], <16 x i8> noundef zeroinitializer)
-// CHECK-BE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef zeroinitializer, <16 x i8> noundef [[REG357]])
-// CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG358]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG359]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef [[REG360]], <8 x i16> noundef zeroinitializer)
-// CHECK-BE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef [[REG360]])
-// CHECK-NEXT: [[REG362:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG361]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG362]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG364:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG363]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG364]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG365]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtsi32_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG366:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = sitofp i32 [[REG366]] to float
-// CHECK-NEXT: store float [[REG367]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG369:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG370:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG369]], float [[REG368]], i32 0
-// CHECK-NEXT: store <4 x float> [[REG370]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG371:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG371]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtsi64_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG372:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG373:[0-9a-zA-Z_%.]+]] = sitofp i64 [[REG372]] to float
-// CHECK-NEXT: store float [[REG373]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG374:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG375:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG375]], float [[REG374]], i32 0
-// CHECK-NEXT: store <4 x float> [[REG376]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG377:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG377]]
-
-// CHECK: define available_externally float @_mm_cvtss_f32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG379:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG378]], i32 0
-// CHECK-NEXT: ret float [[REG379]]
-
-// CHECK: define available_externally signext i32 @_mm_cvtss_si32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
-// CHECK-BE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
-// CHECK-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 0
-// CHECK-NEXT: [[REG383:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 1
-// CHECK-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 2
-// CHECK-NEXT: store <4 x float> [[REG382]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 [[REG383]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store double [[REG384]], double* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG385:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG386:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG385]] to i32
-// CHECK-NEXT: ret i32 [[REG386]]
-
-// CHECK: define available_externally i64 @_mm_cvtss_si64
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
-// CHECK-BE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
-// CHECK-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 0
-// CHECK-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 1
-// CHECK-NEXT: [[REG391:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 2
-// CHECK-NEXT: store <4 x float> [[REG389]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 [[REG390]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store double [[REG391]], double* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: ret i64 [[REG392]]
-
-// CHECK: define available_externally i64 @_mm_cvtt_ps2pi
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK: [[REG393:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG394:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvttps_pi32(<4 x float> noundef [[REG393]])
-// CHECK-NEXT: ret i64 [[REG394]]
-
-// CHECK: define available_externally signext i32 @_mm_cvtt_ss2si
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG396:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvttss_si32(<4 x float> noundef [[REG395]])
-// CHECK-NEXT: ret i32 [[REG396]]
-
-// CHECK: define available_externally i64 @_mm_cvttps_pi32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG397]] to <2 x i64>
-// CHECK-NEXT: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef [[REG398]], i32 noundef zeroext 0)
-// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <4 x float>
-// CHECK-NEXT: store <4 x float> [[REG400]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG401:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG402:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG401]], i32 0)
-// CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG402]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG403]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG404:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG404]], i32 0
-// CHECK-NEXT: ret i64 [[REG405]]
-
-// CHECK: define available_externally signext i32 @_mm_cvttss_si32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG407:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG406]], i32 0
-// CHECK-NEXT: store float [[REG407]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG409:[0-9a-zA-Z_%.]+]] = fptosi float [[REG408]] to i32
-// CHECK-NEXT: ret i32 [[REG409]]
-
-// CHECK: define available_externally i64 @_mm_cvttss_si64
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG410:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG410]], i32 0
-// CHECK-NEXT: store float [[REG411]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG412:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = fptosi float [[REG412]] to i64
-// CHECK-NEXT: ret i64 [[REG413]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_pi2ps
+// CHECK: call <4 x float> @_mm_cvtpi32_ps
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvt_ps2pi
+// CHECK: call i64 @_mm_cvtps_pi32
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_si2ss
+// CHECK: call <4 x float> @_mm_cvtsi32_ss
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvt_ss2si
+// CHECK: call signext i32 @_mm_cvtss_si32
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi16_ps
+// CHECK: call <4 x i32> @vec_vupklsh(short vector[8])
+// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32_ps
+// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32x2_ps
+// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi8_ps
+// CHECK: call <8 x i16> @vec_vupkhsb(signed char vector[16])
+// CHECK: call <4 x i32> @vec_vupkhsh(short vector[8])
+// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi16
+// CHECK: call <4 x float> @vec_rint(float vector[4])
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
+// CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi32
+// CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_rint(float vector[4])
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi8
+// CHECK: call <4 x float> @vec_rint(float vector[4])
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
+// CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK: call <16 x i8> @vec_pack(short vector[8], short vector[8])
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu16_ps
+// CHECK-LE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
+// CHECK-BE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu8_ps
+// CHECK-BE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK-BE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK-LE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
+// CHECK-LE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
+// CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi32_ss
+// CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to float
+// CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi64_ss
+// CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to float
+// CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally float @_mm_cvtss_f32
+// CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvtss_si32
+// CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
+// CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
+// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 0
+// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 1
+// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 2
+// CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtss_si64
+// CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
+// CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
+// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 0
+// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 1
+// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 2
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvtt_ps2pi
+// CHECK: call i64 @_mm_cvttps_pi32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvtt_ss2si
+// CHECK: call signext i32 @_mm_cvttss_si32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvttps_pi32
+// CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_cvttss_si32
+// CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i32
+
+// CHECK-LABEL: define available_externally i64 @_mm_cvttss_si64
+// CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i64
 
 void __attribute__((noinline))
 test_div() {
@@ -853,31 +424,14 @@ test_div() {
 
 // CHECK-LABEL: @test_div
 
-// CHECK: define available_externally <4 x float> @_mm_div_ps(<4 x float> noundef [[REG414:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG415:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG414]], <4 x float>* [[REG416:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG415]], <4 x float>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG418:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG416]], align 16
-// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG417]], align 16
-// CHECK-NEXT: [[REG420:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG418]], [[REG419]]
-// CHECK-NEXT: ret <4 x float> [[REG420]]
-
-// CHECK: define available_externally <4 x float> @_mm_div_ss(<4 x float> noundef [[REG421:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG422:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG421]], <4 x float>* [[REG423:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG422]], <4 x float>* [[REG424:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG425:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
-// CHECK-NEXT: [[REG426:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG425]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG426]], <4 x float>* [[REG427:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG428:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG424]], align 16
-// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG428]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG429]], <4 x float>* [[REG430:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG431:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG427]], align 16
-// CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG430]], align 16
-// CHECK-NEXT: [[REG433:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG431]], [[REG432]]
-// CHECK-NEXT: store <4 x float> [[REG433]], <4 x float>* [[REG434:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
-// CHECK-NEXT: [[REG436:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG434]], align 16
-// CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG435]], <4 x float> noundef [[REG436]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG437]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_div_ps
+// CHECK: fdiv <4 x float>
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_div_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: fdiv <4 x float>
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
 
 void __attribute__((noinline))
 test_extract() {
@@ -887,23 +441,17 @@ test_extract() {
 
 // CHECK-LABEL: @test_extract
 
-// CHECK: define available_externally signext i32 @_mm_extract_pi16
-// CHECK: [[REG438:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG439:[0-9a-zA-Z_%.]+]] = and i32 [[REG438]], 3
-// CHECK-NEXT: store i32 [[REG439]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-BE: sub i32 3, {{[0-9a-zA-Z_%.]+}}
-// CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = mul i32 [[REG441]], 16
-// CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = zext i32 [[REG442]] to i64
-// CHECK-NEXT: [[REG444:[0-9a-zA-Z_%.]+]] = lshr i64 [[REG440]], [[REG443]]
-// CHECK-NEXT: [[REG445:[0-9a-zA-Z_%.]+]] = and i64 [[REG444]], 65535
-// CHECK-NEXT: [[REG446:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG445]] to i32
-// CHECK-NEXT: ret i32 [[REG446]]
-
-// CHECK: define available_externally signext i32 @_m_pextrw
-// CHECK: [[REG447:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_extract_pi16
-// CHECK-NEXT: ret i32 [[REG447]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_extract_pi16
+// CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK-BE: sub i32 3, %{{[0-9a-zA-Z_.]+}}
+// CHECK: %[[MUL:[0-9a-zA-Z_.]+]] = mul i32 %{{[0-9a-zA-Z_.]+}}, 16
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = zext i32 %[[MUL]] to i64
+// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = lshr i64 %{{[0-9a-zA-Z_.]+}}, %[[EXT]]
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64 %[[SHR]], 65535
+// CHECK: trunc i64 %[[AND]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_m_pextrw
+// CHECK: call signext i32 @_mm_extract_pi16
 
 void __attribute__((noinline))
 test_insert() {
@@ -913,39 +461,21 @@ test_insert() {
 
 // CHECK-LABEL: @test_insert
 
-// CHECK: define available_externally i64 @_mm_insert_pi16
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG449:[0-9a-zA-Z_%.]+]] = and i32 [[REG448]], 3
-// CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = mul nsw i32 [[REG449]], 16
-// CHECK-NEXT: store i32 [[REG450]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG451:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = sext i32 [[REG451]] to i64
-// CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = zext i32 [[REG453]] to i64
-// CHECK-NEXT: [[REG455:[0-9a-zA-Z_%.]+]] = shl i64 [[REG452]], [[REG454]]
-// CHECK-NEXT: store i64 [[REG455]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG457:[0-9a-zA-Z_%.]+]] = zext i32 [[REG456]] to i64
-// CHECK-NEXT: [[REG458:[0-9a-zA-Z_%.]+]] = shl i64 65535, [[REG457]]
-// CHECK-NEXT: store i64 [[REG458]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG460:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG461:[0-9a-zA-Z_%.]+]] = xor i64 [[REG460]], -1
-// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = and i64 [[REG459]], [[REG461]]
-// CHECK-NEXT: [[REG463:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG464:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = and i64 [[REG463]], [[REG464]]
-// CHECK-NEXT: [[REG466:[0-9a-zA-Z_%.]+]] = or i64 [[REG462]], [[REG465]]
-// CHECK-NEXT: store i64 [[REG466]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG467:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: ret i64 [[REG467]]
-
-// CHECK: define available_externally i64 @_m_pinsrw
-// CHECK: [[REG468:[0-9a-zA-Z_%.]+]] = call i64 @_mm_insert_pi16
-// CHECK-NEXT: ret i64 [[REG468]]
+// CHECK-LABEL: define available_externally i64 @_mm_insert_pi16
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: mul nsw i32 %[[AND]], 16
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: shl i64 %[[EXT]], %[[EXT2]]
+// CHECK: %[[EXT3:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
+// CHECK: shl i64 65535, %[[EXT3]]
+// CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1
+// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]]
+// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
+// CHECK: or i64 %[[AND2]], %[[AND3]]
+
+// CHECK-LABEL: define available_externally i64 @_m_pinsrw
+// CHECK: call i64 @_mm_insert_pi16
 
 void __attribute__((noinline))
 test_load() {
@@ -961,59 +491,34 @@ test_load() {
 
 // CHECK-LABEL: @test_load
 
-// CHECK: define available_externally <4 x float> @_mm_load_ps
-// CHECK: [[REG469:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(long, float vector[4] const*)
-// CHECK-NEXT: ret <4 x float> [[REG469]]
-
-// CHECK: define available_externally <4 x float> @_mm_load_ps1
-// CHECK: [[REG470:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_load1_ps
-// CHECK-NEXT: ret <4 x float> [[REG470]]
-
-// CHECK: define available_externally <4 x float> @_mm_load_ss
-// CHECK: [[REG471:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set_ss
-// CHECK-NEXT: ret <4 x float> [[REG471]]
-
-// CHECK: define available_externally <4 x float> @_mm_load1_ps
-// CHECK: [[REG472:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps
-// CHECK-NEXT: ret <4 x float> [[REG472]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadh_pi
-// CHECK: [[REG473:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: store <2 x i64> [[REG473]], <2 x i64>* [[REG474:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG474]], align 16
-// CHECK-NEXT: [[REG476:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG475]], i32 1
-// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG479:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG477]], i64 [[REG476]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG479]], <2 x i64>* [[REG478]], align 16
-// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478]], align 16
-// CHECK-NEXT: [[REG481:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG480]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG481]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadl_pi
-// CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: store <2 x i64> [[REG482]], <2 x i64>* [[REG483:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG484:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG483]], align 16
-// CHECK-NEXT: [[REG485:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG484]], i32 0
-// CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG488:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG486]], i64 [[REG485]], i32 0
-// CHECK-NEXT: store <2 x i64> [[REG488]], <2 x i64>* [[REG487]], align 16
-// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487]], align 16
-// CHECK-NEXT: [[REG490:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG489]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG490]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadr_ps
-// CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(long, float vector[4] const*)
-// CHECK-NEXT: store <4 x float> [[REG491]], <4 x float>* [[REG492:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
-// CHECK-NEXT: [[REG494:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
-// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef [[REG493]], <4 x float> noundef [[REG494]], <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
-// CHECK-NEXT: store <4 x float> [[REG495]], <4 x float>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG496]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG497]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadu_ps
-// CHECK: [[REG498:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vsx_ld(int, float const*)
-// CHECK-NEXT: ret <4 x float> [[REG498]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps
+// CHECK: call <4 x float> @vec_ld(long, float vector[4] const*)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps1
+// CHECK: call <4 x float> @_mm_load1_ps
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_load_ss
+// CHECK: call <4 x float> @_mm_set_ss
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_load1_ps
+// CHECK: call <4 x float> @_mm_set1_ps
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_loadh_pi
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[VAL]], i32 1
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_loadl_pi
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[EXT]], i32 0
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_loadr_ps
+// CHECK: call <4 x float> @vec_ld(long, float vector[4] const*)
+// CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_loadu_ps
+// CHECK: call <4 x float> @vec_vsx_ld(int, float const*)
 
 void __attribute__((noinline))
 test_logic() {
@@ -1025,37 +530,17 @@ test_logic() {
 
 // CHECK-LABEL: @test_logic
 
-// CHECK: define available_externally <4 x float> @_mm_or_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_or(float vector[4], float vector[4])(<4 x float> noundef [[REG499]], <4 x float> noundef [[REG500]])
-// CHECK-NEXT: ret <4 x float> [[REG501]]
-
-// CHECK: define available_externally <4 x float> @_mm_and_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG504:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_and(float vector[4], float vector[4])(<4 x float> noundef [[REG502]], <4 x float> noundef [[REG503]])
-// CHECK-NEXT: ret <4 x float> [[REG504]]
-
-// CHECK: define available_externally <4 x float> @_mm_andnot_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG506:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG507:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_andc(float vector[4], float vector[4])(<4 x float> noundef [[REG505]], <4 x float> noundef [[REG506]])
-// CHECK-NEXT: ret <4 x float> [[REG507]]
-
-// CHECK: define available_externally <4 x float> @_mm_xor_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG508:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG509:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG510:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_xor(float vector[4], float vector[4])(<4 x float> noundef [[REG508]], <4 x float> noundef [[REG509]])
-// CHECK-NEXT: ret <4 x float> [[REG510]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_or_ps
+// CHECK: call <4 x float> @vec_or(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_and_ps
+// CHECK: call <4 x float> @vec_and(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_andnot_ps
+// CHECK: call <4 x float> @vec_andc(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_xor_ps
+// CHECK: call <4 x float> @vec_xor(float vector[4], float vector[4])
 
 void __attribute__((noinline))
 test_max() {
@@ -1067,75 +552,31 @@ test_max() {
 
 // CHECK-LABEL: @test_max
 
-// CHECK: define available_externally <4 x float> @_mm_max_ps(<4 x float> noundef [[REG511:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG512:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG511]], <4 x float>* [[REG513:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG512]], <4 x float>* [[REG514:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG515:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
-// CHECK-NEXT: [[REG516:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
-// CHECK-NEXT: [[REG517:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> noundef [[REG515]], <4 x float> noundef [[REG516]])
-// CHECK-NEXT: store <4 x i32> [[REG517]], <4 x i32>* [[REG518:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG519:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
-// CHECK-NEXT: [[REG520:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
-// CHECK-NEXT: [[REG521:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG518]], align 16
-// CHECK-NEXT: [[REG522:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> noundef [[REG519]], <4 x float> noundef [[REG520]], <4 x i32> noundef [[REG521]])
-// CHECK-NEXT: ret <4 x float> [[REG522]]
-
-// CHECK: define available_externally <4 x float> @_mm_max_ss
-// CHECK: [[REG523:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG523]], <4 x float>* [[REG524:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG525:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG525]], <4 x float>* [[REG526:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG524]], align 16
-// CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG526]], align 16
-// CHECK-NEXT: call <4 x float> @vec_max(float vector[4], float vector[4])(<4 x float> noundef [[REG527]], <4 x float> noundef [[REG528]])
-// CHECK: [[REG529:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG529]]
-
-// CHECK: define available_externally i64 @_mm_max_pi16
-// CHECK: [[REG530:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG530]])
-// CHECK-NEXT: [[REG532:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG531]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG532]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG533:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG534:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG533]])
-// CHECK-NEXT: [[REG535:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG534]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG535]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG536:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG537:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG538:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> noundef [[REG536]], <8 x i16> noundef [[REG537]])
-// CHECK-NEXT: store <8 x i16> [[REG538]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG539:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG540:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG542:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> noundef [[REG539]], <8 x i16> noundef [[REG540]], <8 x i16> noundef [[REG541]])
-// CHECK-NEXT: store <8 x i16> [[REG542]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG543:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG544:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG543]] to <2 x i64>
-// CHECK-NEXT: [[REG545:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG544]], i32 0
-// CHECK-NEXT: ret i64 [[REG545]]
-
-// CHECK: define available_externally i64 @_mm_max_pu8
-// CHECK: [[REG546:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG547:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG546]])
-// CHECK-NEXT: [[REG548:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG547]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG548]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG549:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG550:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG549]])
-// CHECK-NEXT: [[REG551:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG550]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG551]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG552:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG553:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG554:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG552]], <16 x i8> noundef [[REG553]])
-// CHECK-NEXT: store <16 x i8> [[REG554]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG555:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG556:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG557:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG558:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> noundef [[REG555]], <16 x i8> noundef [[REG556]], <16 x i8> noundef [[REG557]])
-// CHECK-NEXT: store <16 x i8> [[REG558]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG559:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG560:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG559]] to <2 x i64>
-// CHECK-NEXT: [[REG561:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG560]], i32 0
-// CHECK-NEXT: ret i64 [[REG561]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_max_ps
+// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_max_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_max(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally i64 @_mm_max_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
+// CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_max_pu8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
 void __attribute__((noinline))
 test_alt_name_max() {
@@ -1145,13 +586,11 @@ test_alt_name_max() {
 
 // CHECK-LABEL: @test_alt_name_max
 
-// CHECK: define available_externally i64 @_m_pmaxsw
-// CHECK: [[REG562:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pi16
-// CHECK-NEXT: ret i64 [[REG562]]
+// CHECK-LABEL: define available_externally i64 @_m_pmaxsw
+// CHECK: call i64 @_mm_max_pi16
 
-// CHECK: define available_externally i64 @_m_pmaxub
-// CHECK: [[REG563:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pu8
-// CHECK-NEXT: ret i64 [[REG563]]
+// CHECK-LABEL: define available_externally i64 @_m_pmaxub
+// CHECK: call i64 @_mm_max_pu8
 
 void __attribute__((noinline))
 test_min() {
@@ -1163,91 +602,45 @@ test_min() {
 
 // CHECK-LABEL: @test_min
 
-// CHECK: define available_externally <4 x float> @_mm_min_ps(<4 x float> noundef [[REG517:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG518:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG517]], <4 x float>* [[REG564:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG518]], <4 x float>* [[REG565:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG566:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
-// CHECK-NEXT: [[REG567:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
-// CHECK-NEXT: [[REG568:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> noundef [[REG566]], <4 x float> noundef [[REG567]])
-// CHECK-NEXT: store <4 x i32> [[REG568]], <4 x i32>* [[REG569:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG570:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
-// CHECK-NEXT: [[REG571:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
-// CHECK-NEXT: [[REG572:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG569]], align 16
-// CHECK-NEXT: [[REG573:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> noundef [[REG570]], <4 x float> noundef [[REG571]], <4 x i32> noundef [[REG572]])
-// CHECK-NEXT: ret <4 x float> [[REG573]]
-
-// CHECK: define available_externally <4 x float> @_mm_min_ss
-// CHECK: [[REG574:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG574]], <4 x float>* [[REG575:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG576:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, {{i32|i32 noundef zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG576]], <4 x float>* [[REG577:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG578:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG575]], align 16
-// CHECK-NEXT: [[REG579:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG577]], align 16
-// CHECK-NEXT: call <4 x float> @vec_min(float vector[4], float vector[4])(<4 x float> noundef [[REG578]], <4 x float> noundef [[REG579]])
-// CHECK: [[REG580:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x float> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG580]]
-
-// CHECK: define available_externally i64 @_mm_min_pi16
-// CHECK: [[REG581:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG582:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG581]])
-// CHECK-NEXT: [[REG583:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG582]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG583]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG584:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG585:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG584]])
-// CHECK-NEXT: [[REG586:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG585]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG586]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG587:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG588:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG589:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> noundef [[REG587]], <8 x i16> noundef [[REG588]])
-// CHECK-NEXT: store <8 x i16> [[REG589]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG593:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> noundef [[REG590]], <8 x i16> noundef [[REG591]], <8 x i16> noundef [[REG592]])
-// CHECK-NEXT: store <8 x i16> [[REG593]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG594:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG595:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG594]] to <2 x i64>
-// CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG595]], i32 0
-// CHECK-NEXT: ret i64 [[REG596]]
-
-// CHECK: define available_externally i64 @_mm_min_pu8
-// CHECK: [[REG597:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG598:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG597]])
-// CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG598]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG599]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG601:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG600]])
-// CHECK-NEXT: [[REG602:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG601]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG602]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG605:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG603]], <16 x i8> noundef [[REG604]])
-// CHECK-NEXT: store <16 x i8> [[REG605]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG608:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> noundef [[REG606]], <16 x i8> noundef [[REG607]], <16 x i8> noundef [[REG608]])
-// CHECK-NEXT: store <16 x i8> [[REG609]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG611:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG610]] to <2 x i64>
-// CHECK-NEXT: [[REG612:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG611]], i32 0
-// CHECK-NEXT: ret i64 [[REG612]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_min_ps
+// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_min_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
+// CHECK: call <4 x float> @vec_min(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally i64 @_mm_min_pi16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
+// CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
+
+// CHECK-LABEL: define available_externally i64 @_mm_min_pu8
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: extractelement <2 x i64> %[[CAST]], i32 0
 
 void __attribute__((noinline))
 test_alt_name_min() {
   res64 = _m_pminsw(ms[0], ms[1]);
-  res64 = _m_pminub(ms[0], ms[1]);
+    res64 = _m_pminub(ms[0], ms[1]);
 }
 
 // CHECK-LABEL: @test_alt_name_min
 
-// CHECK: define available_externally i64 @_m_pminsw
-// CHECK: [[REG613:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pi16
-// CHECK-NEXT: ret i64 [[REG613]]
+// CHECK-LABEL: define available_externally i64 @_m_pminsw
+// CHECK: call i64 @_mm_min_pi16
 
-// CHECK: define available_externally i64 @_m_pminub
-// CHECK: [[REG614:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pu8
-// CHECK-NEXT: ret i64 [[REG614]]
+// CHECK-LABEL: define available_externally i64 @_m_pminub
+// CHECK: call i64 @_mm_min_pu8
 
 void __attribute__((noinline))
 test_move() {
@@ -1261,89 +654,36 @@ test_move() {
 
 // CHECK-LABEL: @test_move
 
-// CHECK: define available_externally void @_mm_maskmove_si64
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 -9187201950435737472, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG615:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG616:[0-9a-zA-Z_%.]+]] = bitcast i8* [[REG615]] to i64*
-// CHECK-NEXT: store i64* [[REG616]], i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG617:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG618:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG617]], align 8
-// CHECK-NEXT: store i64 [[REG618]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG619:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG620:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG621:[0-9a-zA-Z_%.]+]] = and i64 [[REG619]], [[REG620]]
-// CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG623:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8(i64 noundef [[REG621]], i64 noundef [[REG622]])
-// CHECK-NEXT: store i64 [[REG623]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG624:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = xor i64 [[REG625]], -1
-// CHECK-NEXT: [[REG627:[0-9a-zA-Z_%.]+]] = and i64 [[REG624]], [[REG626]]
-// CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = and i64 [[REG628]], [[REG629]]
-// CHECK-NEXT: [[REG631:[0-9a-zA-Z_%.]+]] = or i64 [[REG627]], [[REG630]]
-// CHECK-NEXT: store i64 [[REG631]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG632:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG633:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG632]], i64* [[REG633]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally <4 x float> @_mm_move_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG634:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG635:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG636:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG634]], <4 x float> noundef [[REG635]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG636]]
-
-// CHECK: define available_externally <4 x float> @_mm_movehl_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG637:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG638:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG637]] to <2 x i64>
-// CHECK-NEXT: [[REG639:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG640:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG639]] to <2 x i64>
-// CHECK-NEXT: [[REG641:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG638]], <2 x i64> noundef [[REG640]])
-// CHECK-NEXT: [[REG642:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG641]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG642]]
-
-// CHECK: define available_externally <4 x float> @_mm_movelh_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG643:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG644:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG643]] to <2 x i64>
-// CHECK-NEXT: [[REG645:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG646:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG645]] to <2 x i64>
-// CHECK-NEXT: [[REG647:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef [[REG644]], <2 x i64> noundef [[REG646]])
-// CHECK-NEXT: [[REG648:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG647]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG648]]
-
-// CHECK: define available_externally signext i32 @_mm_movemask_pi8
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-LE-NEXT: store i64 2269495618449464, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-BE-NEXT: store i64 4048780183313844224, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG649:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG650:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG651:[0-9a-zA-Z_%.]+]] = call i64 @llvm.ppc.bpermd(i64 [[REG649]], i64 [[REG650]])
-// CHECK-NEXT: [[REG652:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG651]] to i32
-// CHECK-NEXT: ret i32 [[REG652]]
-
-// CHECK: define available_externally signext i32 @_mm_movemask_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG653:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG654:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG653]] to <16 x i8>
-// CHECK-LE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG654]], <16 x i8> noundef bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
-// CHECK-BE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG654]], <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
-// CHECK-NEXT: store <2 x i64> [[REG655]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG656:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 1
-// CHECK-BE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 0
-// CHECK-NEXT: [[REG658:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG657]] to i32
-// CHECK-NEXT: ret i32 [[REG658]]
+// CHECK-LABEL: define available_externally void @_mm_maskmove_si64
+// CHECK: store i64 -9187201950435737472, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64
+// CHECK: call i64 @_mm_cmpeq_pi8(i64 noundef %[[AND]], i64 noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1
+// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]]
+// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64
+// CHECK: or i64 %[[AND2]], %[[AND3]]
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_move_ss
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_movehl_ps
+// CHECK: call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_movelh_ps
+// CHECK: call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pi8
+// CHECK-LE: store i64 2269495618449464, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK-BE: store i64 4048780183313844224, i64* %{{[0-9a-zA-Z_.]+}}, align 8
+// CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call i64 @llvm.ppc.bpermd
+// CHECK: trunc i64 %[[CALL]] to i32
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_movemask_ps
+// CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
+// CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
+// CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: trunc i64 %[[EXT]] to i32
 
 void __attribute__((noinline))
 test_alt_name_move() {
@@ -1353,13 +693,11 @@ test_alt_name_move() {
 
 // CHECK-LABEL: @test_alt_name_move
 
-// CHECK: define available_externally signext i32 @_m_pmovmskb
-// CHECK: [[REG659:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_movemask_pi8
-// CHECK-NEXT: ret i32 [[REG659]]
+// CHECK-LABEL: define available_externally signext i32 @_m_pmovmskb
+// CHECK: call signext i32 @_mm_movemask_pi8
 
-// CHECK: define available_externally void @_m_maskmovq
+// CHECK-LABEL: define available_externally void @_m_maskmovq
 // CHECK: call void @_mm_maskmove_si64
-// CHECK-NEXT: ret void
 
 void __attribute__((noinline))
 test_mul() {
@@ -1371,71 +709,27 @@ test_mul() {
 
 // CHECK-LABEL: @test_mul
 
-// CHECK: define available_externally <4 x float> @_mm_mul_ps(<4 x float> noundef [[REG660:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG661:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG660]], <4 x float>* [[REG662:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG661]], <4 x float>* [[REG663:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG664:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG662]], align 16
-// CHECK-NEXT: [[REG665:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG663]], align 16
-// CHECK-NEXT: [[REG666:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG664]], [[REG665]]
-// CHECK-NEXT: ret <4 x float> [[REG666]]
-
-// CHECK: define available_externally <4 x float> @_mm_mul_ss(<4 x float> noundef [[REG667:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG668:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG667]], <4 x float>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG668]], <4 x float>* [[REG670:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG671:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
-// CHECK-NEXT: [[REG672:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG671]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG672]], <4 x float>* [[REG673:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG674:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG670]], align 16
-// CHECK-NEXT: [[REG675:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG674]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG675]], <4 x float>* [[REG676:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG677:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG673]], align 16
-// CHECK-NEXT: [[REG678:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG676]], align 16
-// CHECK-NEXT: [[REG679:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG677]], [[REG678]]
-// CHECK-NEXT: store <4 x float> [[REG679]], <4 x float>* [[REG680:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG681:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
-// CHECK-NEXT: [[REG682:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG680]], align 16
-// CHECK-NEXT: [[REG683:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG681]], <4 x float> noundef [[REG682]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG683]]
-
-// CHECK: define available_externally i64 @_mm_mulhi_pu16(i64 noundef [[REG684:[0-9a-zA-Z_%.]+]], i64 noundef [[REG685:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG684]], i64* [[REG686:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG685]], i64* [[REG687:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-LE-NEXT: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-BE-NEXT: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG688:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG686]], align 8
-// CHECK-NEXT: [[REG689:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG688]])
-// CHECK-NEXT: [[REG690:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG689]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG690]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG691:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG687]], align 8
-// CHECK-NEXT: [[REG692:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG691]])
-// CHECK-NEXT: [[REG693:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG692]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG693]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG694:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG695:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG696:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmuleuh(<8 x i16> noundef [[REG694]], <8 x i16> noundef [[REG695]])
-// CHECK-NEXT: store <4 x i32> [[REG696]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG697:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG698:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG699:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulouh(<8 x i16> noundef [[REG697]], <8 x i16> noundef [[REG698]])
-// CHECK-NEXT: store <4 x i32> [[REG699]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG700:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG701:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG702:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG703:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])(<4 x i32> noundef [[REG700]], <4 x i32> noundef [[REG701]], <16 x i8> noundef [[REG702]])
-// CHECK-NEXT: [[REG704:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG703]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG704]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG705:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG706:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG705]] to <2 x i64>
-// CHECK-NEXT: [[REG707:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG706]], i32 0
-// CHECK-NEXT: ret i64 [[REG707]]
-
-// CHECK: define available_externally i64 @_m_pmulhuw(i64 noundef [[REG708:[0-9a-zA-Z_%.]+]], i64 noundef [[REG709:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG708]], i64* [[REG710:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG709]], i64* [[REG711:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG712:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG710]], align 8
-// CHECK-NEXT: [[REG713:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG711]], align 8
-// CHECK-NEXT: [[REG714:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pu16(i64 noundef [[REG712]], i64 noundef [[REG713]])
-// CHECK-NEXT: ret i64 [[REG714]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ps
+// CHECK: fmul <4 x float>
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: fmul <4 x float>
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
+
+// CHECK-LABEL: define available_externally i64 @_mm_mulhi_pu16
+// CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally i64 @_m_pmulhuw
+// CHECK: call i64 @_mm_mulhi_pu16
 
 void __attribute__((noinline))
 test_prefetch() {
@@ -1444,12 +738,8 @@ test_prefetch() {
 
 // CHECK-LABEL: @test_prefetch
 
-// CHECK: define available_externally void @_mm_prefetch
-// CHECK: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG715:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: call void @llvm.prefetch.p0i8(i8* [[REG715]], i32 0, i32 3, i32 1)
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally void @_mm_prefetch
+// CHECK: call void @llvm.prefetch.p0i8(i8* %{{[0-9a-zA-Z_.]+}}, i32 0, i32 3, i32 1)
 
 void __attribute__((noinline))
 test_rcp() {
@@ -1459,24 +749,13 @@ test_rcp() {
 
 // CHECK-LABEL: @test_rcp
 
-// CHECK: define available_externally <4 x float> @_mm_rcp_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG716:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG717:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_re(float vector[4])(<4 x float> noundef [[REG716]])
-// CHECK-NEXT: ret <4 x float> [[REG717]]
-
-// CHECK: define available_externally <4 x float> @_mm_rcp_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG718:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG719:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG718]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG719]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG720:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG721:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_rcp_ps(<4 x float> noundef [[REG720]])
-// CHECK-NEXT: store <4 x float> [[REG721]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG722:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG723:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG724:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG722]], <4 x float> noundef [[REG723]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG724]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ps
+// CHECK: call <4 x float> @vec_re(float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)
+// CHECK: call <4 x float> @_mm_rcp_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
 
 void __attribute__((noinline))
 test_rsqrt() {
@@ -1486,20 +765,13 @@ test_rsqrt() {
 
 // CHECK-LABEL: @test_rsqrt
 
-// CHECK: define available_externally <4 x float> @_mm_rsqrt_ps
-// CHECK: [[REG725:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: ret <4 x float> [[REG725]]
-
-// CHECK: define available_externally <4 x float> @_mm_rsqrt_ss
-// CHECK: [[REG726:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG726]], <4 x float>* [[REG727:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG728:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG727]], align 16
-// CHECK-NEXT: [[REG729:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> noundef [[REG728]])
-// CHECK-NEXT: store <4 x float> [[REG729]], <4 x float>* [[REG730:[0-9a-zA_Z_%.]+]], align 16
-// CHECK-NEXT: [[REG731:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG732:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG730]], align 16
-// CHECK-NEXT: [[REG733:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG731]], <4 x float> noundef [[REG732]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG733]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ps
+// CHECK: call <4 x float> @vec_rsqrte(float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_rsqrte(float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
 
 void __attribute__((noinline))
 test_sad() {
@@ -1509,55 +781,23 @@ test_sad() {
 
 // CHECK-LABEL: @test_sad
 
-// CHECK: define available_externally i64 @_mm_sad_pu8(i64 noundef [[REG734:[0-9a-zA-Z_%.]+]], i64 noundef [[REG735:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG734]], i64* [[REG736:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG735]], i64* [[REG737:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG738:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[REG738]], i8 0, i64 8, i1 false)
-// CHECK-NEXT: [[REG739:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG736]], align 8
-// CHECK-NEXT: [[REG740:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG739]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG740]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG741:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG742:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG741]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG742]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG743:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG737]], align 8
-// CHECK-NEXT: [[REG744:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG743]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG744]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG745:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG746:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG745]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG746]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG747:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG748:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG749:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG747]], <16 x i8> noundef [[REG748]])
-// CHECK-NEXT: store <16 x i8> [[REG749]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG750:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG751:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG752:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG750]], <16 x i8> noundef [[REG751]])
-// CHECK-NEXT: store <16 x i8> [[REG752]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG753:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG754:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG755:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef [[REG753]], <16 x i8> noundef [[REG754]])
-// CHECK-NEXT: store <16 x i8> [[REG755]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG756:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG757:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef [[REG756]], <4 x i32> noundef zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG757]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG758:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG759:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sums(<4 x i32> noundef [[REG758]], <4 x i32> noundef zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG759]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG761:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG760]], i32 3
-// CHECK-NEXT: [[REG762:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG761]] to i16
-// CHECK-NEXT: [[REG763:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-NEXT: [[REG764:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG763]], i64 0, i64 0
-// CHECK-NEXT: store i16 [[REG762]], i16* [[REG764]], align 8
-// CHECK-NEXT: [[REG765:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
-// CHECK-NEXT: [[REG766:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG765]], align 8
-// CHECK-NEXT: ret i64 [[REG766]]
-
-// CHECK: define available_externally i64 @_m_psadbw
-// CHECK: [[REG767:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sad_pu8
-// CHECK-NEXT: ret i64 [[REG767]]
+// CHECK-LABEL: define available_externally i64 @_mm_sad_pu8
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %{{[0-9a-zA-Z_.]+}}, i8 0, i64 8, i1 false)
+// CHECK: insertelement <2 x i64> <i64 0, i64 undef>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: insertelement <2 x i64> <i64 0, i64 undef>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
+// CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])
+// CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK: call <4 x i32> @vec_sums(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
+// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 3
+// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[EXT]] to i16
+// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast %{{[0-9a-zA-Z_.]+}}* %{{[0-9a-zA-Z_.]+}} to [4 x i16]*
+// CHECK: %[[GEP:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* %[[CAST]], i64 0, i64 0
+// CHECK: store i16 %[[TRUNC]], i16* %[[GEP]], align 8
+
+// CHECK-LABEL: define available_externally i64 @_m_psadbw
+// CHECK: call i64 @_mm_sad_pu8
 
 void __attribute__((noinline))
 test_set() {
@@ -1570,70 +810,36 @@ test_set() {
 
 // CHECK-LABEL: @test_set
 
-// CHECK: define available_externally <4 x float> @_mm_set_ps(float noundef [[REG768:[0-9a-zA-Z_%.]+]], float noundef [[REG769:[0-9a-zA-Z_%.]+]], float noundef [[REG770:[0-9a-zA-Z_%.]+]], float noundef [[REG771:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG768]], float* [[REG772:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG769]], float* [[REG773:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG770]], float* [[REG774:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG771]], float* [[REG775:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG776:[0-9a-zA-Z_%.]+]] = load float, float* [[REG775]], align 4
-// CHECK-NEXT: [[REG777:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG776]], i32 0
-// CHECK-NEXT: [[REG778:[0-9a-zA-Z_%.]+]] = load float, float* [[REG774]], align 4
-// CHECK-NEXT: [[REG779:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG777]], float [[REG778]], i32 1
-// CHECK-NEXT: [[REG780:[0-9a-zA-Z_%.]+]] = load float, float* [[REG773]], align 4
-// CHECK-NEXT: [[REG781:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG779]], float [[REG780]], i32 2
-// CHECK-NEXT: [[REG782:[0-9a-zA-Z_%.]+]] = load float, float* [[REG772]], align 4
-// CHECK-NEXT: [[REG783:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG781]], float [[REG782]], i32 3
-// CHECK-NEXT: store <4 x float> [[REG783]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG784:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG784]]
-
-// CHECK: define available_externally <4 x float> @_mm_set_ps1(float noundef [[REG785:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG785]], float* [[REG786:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG787:[0-9a-zA-Z_%.]+]] = load float, float* [[REG786]], align 4
-// CHECK-NEXT: [[REG788:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps(float noundef [[REG787]])
-// CHECK-NEXT: ret <4 x float> [[REG788]]
-
-// CHECK: define available_externally <4 x float> @_mm_set_ss(float noundef [[REG789:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG789:[0-9a-zA-Z_%.]+]], float* [[REG790:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG791:[0-9a-zA-Z_%.]+]] = load float, float* [[REG790]], align 4
-// CHECK-NEXT: [[REG792:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG791]], i32 0
-// CHECK-NEXT: [[REG793:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG792]], float 0.000000e+00, i32 1
-// CHECK-NEXT: [[REG794:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG793]], float 0.000000e+00, i32 2
-// CHECK-NEXT: [[REG795:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG794]], float 0.000000e+00, i32 3
-// CHECK-NEXT: store <4 x float> [[REG795]], <4 x float>* [[REG796:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG797:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG796]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG797]]
-
-// CHECK: define available_externally <4 x float> @_mm_set1_ps(float noundef [[REG798:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG798]], float* [[REG799:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG800:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG801:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG800]], i32 0
-// CHECK-NEXT: [[REG802:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG803:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG801]], float [[REG802]], i32 1
-// CHECK-NEXT: [[REG804:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG805:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG803]], float [[REG804]], i32 2
-// CHECK-NEXT: [[REG806:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG807:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG805]], float [[REG806]], i32 3
-// CHECK-NEXT: store <4 x float> [[REG807]], <4 x float>* [[REG808:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG809:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG808]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG809]]
-
-// CHECK: define available_externally <4 x float> @_mm_setr_ps(float noundef [[REG810:[0-9a-zA-Z_%.]+]], float noundef [[REG811:[0-9a-zA-Z_%.]+]], float noundef [[REG812:[0-9a-zA-Z_%.]+]], float noundef [[REG813:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG810]], float* [[REG814:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG811]], float* [[REG815:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG812]], float* [[REG816:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG813]], float* [[REG817:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG818:[0-9a-zA-Z_%.]+]] = load float, float* [[REG814]], align 4
-// CHECK-NEXT: [[REG819:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG818]], i32 0
-// CHECK-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = load float, float* [[REG815]], align 4
-// CHECK-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG819]], float [[REG820]], i32 1
-// CHECK-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = load float, float* [[REG816]], align 4
-// CHECK-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG821]], float [[REG822]], i32 2
-// CHECK-NEXT: [[REG824:[0-9a-zA-Z_%.]+]] = load float, float* [[REG817]], align 4
-// CHECK-NEXT: [[REG825:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG823]], float [[REG824]], i32 3
-// CHECK-NEXT: store <4 x float> [[REG825]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG826:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG826]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
+// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
+// CHECK: store <4 x float> %[[VEC4]], <4 x float>* %{{[0-9a-zA-Z_.]+}}, align 16
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps1
+// CHECK: call <4 x float> @_mm_set1_ps
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ss
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float 0.000000e+00, i32 1
+// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float 0.000000e+00, i32 2
+// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float 0.000000e+00, i32 3
+// CHECK: store <4 x float> %[[VEC4]], <4 x float>* %{{[0-9a-zA-Z_.]+}}, align 16
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_set1_ps
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
+// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
+// CHECK: store <4 x float> %[[VEC4]], <4 x float>* %{{[0-9a-zA-Z_.]+}}, align 16
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_setr_ps
+// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
+// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
+// CHECK: store <4 x float> %[[VEC4]], <4 x float>* %{{[0-9a-zA-Z_.]+}}, align 16
 
 void __attribute__((noinline))
 test_setzero() {
@@ -1642,10 +848,8 @@ test_setzero() {
 
 // CHECK-LABEL: @test_setzero
 
-// CHECK: define available_externally <4 x float> @_mm_setzero_ps
-// CHECK: store <4 x float> zeroinitializer, <4 x float>* [[REG827:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG827]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG828]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_setzero_ps
+// CHECK: store <4 x float> zeroinitializer, <4 x float>* %{{[0-9a-zA-Z_.]+}}, align 16
 
 void __attribute__((noinline))
 test_sfence() {
@@ -1654,9 +858,8 @@ test_sfence() {
 
 // CHECK-LABEL: @test_sfence
 
-// CHECK: define available_externally void @_mm_sfence
+// CHECK-LABEL: define available_externally void @_mm_sfence
 // CHECK: fence release
-// CHECK-NEXT: ret void
 
 void __attribute__((noinline))
 test_shuffle() {
@@ -1667,132 +870,61 @@ test_shuffle() {
 
 // CHECK-LABEL: @test_shuffle
 
-// CHECK: define available_externally i64 @_mm_shuffle_pi16(i64 noundef [[REG829:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG830:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG829]], i64* [[REG831:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i32 [[REG830]], i32* [[REG832:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG833:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG834:[0-9a-zA-Z_%.]+]] = and i32 [[REG833]], 3
-// CHECK-NEXT: [[REG835:[0-9a-zA-Z_%.]+]] = sext i32 [[REG834]] to i64
-// CHECK-NEXT: store i64 [[REG835]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG836:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG837:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG836]], 2
-// CHECK-NEXT: [[REG838:[0-9a-zA-Z_%.]+]] = and i32 [[REG837]], 3
-// CHECK-NEXT: [[REG839:[0-9a-zA-Z_%.]+]] = sext i32 [[REG838]] to i64
-// CHECK-NEXT: store i64 [[REG839]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG840:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG841:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG840]], 4
-// CHECK-NEXT: [[REG842:[0-9a-zA-Z_%.]+]] = and i32 [[REG841]], 3
-// CHECK-NEXT: [[REG843:[0-9a-zA-Z_%.]+]] = sext i32 [[REG842]] to i64
-// CHECK-NEXT: store i64 [[REG843]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG844:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG845:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG844]], 6
-// CHECK-NEXT: [[REG846:[0-9a-zA-Z_%.]+]] = and i32 [[REG845]], 3
-// CHECK-NEXT: [[REG847:[0-9a-zA-Z_%.]+]] = sext i32 [[REG846]] to i64
-// CHECK-NEXT: store i64 [[REG847]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG848:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG849:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG848]]
-// CHECK-NEXT: [[REG850:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG849]], align 2
-// CHECK-NEXT: [[REG851:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 0
-// CHECK-BE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 3
-// CHECK-NEXT: store i16 [[REG850]], i16* [[REG852]]
-// CHECK-NEXT: [[REG853:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG854:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG853]]
-// CHECK-NEXT: [[REG855:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG854]], align 2
-// CHECK-NEXT: [[REG856:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 1
-// CHECK-BE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 2
-// CHECK-NEXT: store i16 [[REG855]], i16* [[REG857]]
-// CHECK-NEXT: [[REG858:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG859:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG858]]
-// CHECK-NEXT: [[REG860:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG859]], align 2
-// CHECK-NEXT: [[REG861:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 2
-// CHECK-BE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 1
-// CHECK-NEXT: store i16 [[REG860]], i16* [[REG862]]
-// CHECK-NEXT: [[REG863:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG864:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG863]]
-// CHECK-NEXT: [[REG865:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG864]], align 2
-// CHECK-NEXT: [[REG866:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 3
-// CHECK-BE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 0
-// CHECK-NEXT: store i16 [[REG865]], i16* [[REG867]]
-// CHECK-NEXT: [[REG868:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
-// CHECK-NEXT: [[REG869:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG868]], align 8
-// CHECK-NEXT: [[REG870:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG869]])
-// CHECK-NEXT: store <2 x i64> [[REG870]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG871:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG831]], align 8
-// CHECK-NEXT: [[REG872:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 noundef [[REG871]])
-// CHECK-NEXT: store <2 x i64> [[REG872]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG873:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG874:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG875:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG876:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG875]] to <16 x i8>
-// CHECK-NEXT: [[REG877:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])(<2 x i64> noundef [[REG873]], <2 x i64> noundef [[REG874]], <16 x i8> noundef [[REG876]])
-// CHECK-NEXT: store <2 x i64> [[REG877]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG878:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG879:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG878]], i32 0
-// CHECK-NEXT: ret i64 [[REG879]]
-
-// CHECK: define available_externally <4 x float> @_mm_shuffle_ps(<4 x float> noundef [[REG880:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG881:[0-9a-zA-Z_%.]+]], i32 noundef signext [[REG882:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG880]], <4 x float>* [[REG883:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG881]], <4 x float>* [[REG884:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG882]], i32* [[REG885:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG886:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG887:[0-9a-zA-Z_%.]+]] = and i32 [[REG886]], 3
-// CHECK-NEXT: [[REG888:[0-9a-zA-Z_%.]+]] = sext i32 [[REG887]] to i64
-// CHECK-NEXT: store i64 [[REG888]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG889:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG890:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG889]], 2
-// CHECK-NEXT: [[REG891:[0-9a-zA-Z_%.]+]] = and i32 [[REG890]], 3
-// CHECK-NEXT: [[REG892:[0-9a-zA-Z_%.]+]] = sext i32 [[REG891]] to i64
-// CHECK-NEXT: store i64 [[REG892]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG893:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG894:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG893]], 4
-// CHECK-NEXT: [[REG895:[0-9a-zA-Z_%.]+]] = and i32 [[REG894]], 3
-// CHECK-NEXT: [[REG896:[0-9a-zA-Z_%.]+]] = sext i32 [[REG895]] to i64
-// CHECK-NEXT: store i64 [[REG896]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG897:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG898:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG897]], 6
-// CHECK-NEXT: [[REG899:[0-9a-zA-Z_%.]+]] = and i32 [[REG898]], 3
-// CHECK-NEXT: [[REG900:[0-9a-zA-Z_%.]+]] = sext i32 [[REG899]] to i64
-// CHECK-NEXT: store i64 [[REG900]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG901:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG902:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG901]]
-// CHECK-NEXT: [[REG903:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG902]], align 4
-// CHECK-NEXT: [[REG904:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG905:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG904]], i32 [[REG903]], i32 0
-// CHECK-NEXT: store <4 x i32> [[REG905]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG906:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG907:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG906]]
-// CHECK-NEXT: [[REG908:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG907]], align 4
-// CHECK-NEXT: [[REG909:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG910:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG909]], i32 [[REG908]], i32 1
-// CHECK-NEXT: store <4 x i32> [[REG910]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG911:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG912:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG911]]
-// CHECK-NEXT: [[REG913:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG912]], align 4
-// CHECK-NEXT: [[REG914:[0-9a-zA-Z_%.]+]] = add i32 [[REG913]], 269488144
-// CHECK-NEXT: [[REG915:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG916:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG915]], i32 [[REG914]], i32 2
-// CHECK-NEXT: store <4 x i32> [[REG916]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG917:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG918:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG917]]
-// CHECK-NEXT: [[REG919:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG918]], align 4
-// CHECK-NEXT: [[REG920:[0-9a-zA-Z_%.]+]] = add i32 [[REG919]], 269488144
-// CHECK-NEXT: [[REG921:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG922:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG921]], i32 [[REG920]], i32 3
-// CHECK-NEXT: store <4 x i32> [[REG922]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG923:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG924:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG925:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG926:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG925]] to <16 x i8>
-// CHECK-NEXT: [[REG927:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef [[REG923]], <4 x float> noundef [[REG924]], <16 x i8> noundef [[REG926]])
-// CHECK-NEXT: ret <4 x float> [[REG927]]
-
-// CHECK: define available_externally i64 @_m_pshufw
-// CHECK: [[REG928:[0-9a-zA-Z_%.]+]] = call i64 @_mm_shuffle_pi16
-// CHECK-NEXT: ret i64 [[REG928]]
+// CHECK-LABEL: define available_externally i64 @_mm_shuffle_pi16
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: sext i32 %[[AND]] to i64
+// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
+// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
+// CHECK: sext i32 %[[AND2]] to i64
+// CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
+// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
+// CHECK: sext i32 %[[AND3]] to i64
+// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
+// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
+// CHECK: sext i32 %[[AND4]] to i64
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK-LE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK-BE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK-LE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK-BE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK-LE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
+// CHECK-BE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
+// CHECK: getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
+// CHECK-LE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
+// CHECK-BE: getelementptr inbounds [4 x i16], [4 x i16]* %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_splats(unsigned long long)
+// CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
+// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_shuffle_ps
+// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: sext i32 %[[AND]] to i64
+// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
+// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
+// CHECK: sext i32 %[[AND2]] to i64
+// CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
+// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
+// CHECK: sext i32 %[[AND3]] to i64
+// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
+// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
+// CHECK: sext i32 %[[AND4]] to i64
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64
+// CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
+// CHECK: getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64
+// CHECK: %[[ADD2:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
+// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD2]], i32 3
+// CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])
+
+// CHECK-LABEL: define available_externally i64 @_m_pshufw
+// CHECK: call i64 @_mm_shuffle_pi16
 
 void __attribute__((noinline))
 test_sqrt() {
@@ -1802,21 +934,13 @@ test_sqrt() {
 
 // CHECK-LABEL: @test_sqrt
 
-// CHECK: define available_externally <4 x float> @_mm_sqrt_ps
-// CHECK: [[REG929:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> noundef {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: ret <4 x float> [[REG929]]
-
-// CHECK: define available_externally <4 x float> @_mm_sqrt_ss
-// CHECK: [[REG930:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG931:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG930]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG931]], <4 x float>* [[REG932:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG933:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG932]], align 16
-// CHECK-NEXT: [[REG934:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> noundef [[REG933]])
-// CHECK-NEXT: store <4 x float> [[REG934]], <4 x float>* [[REG935:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG936:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG937:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG935]], align 16
-// CHECK-NEXT: [[REG938:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG936]], <4 x float> noundef [[REG937]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG938]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ps
+// CHECK: call <4 x float> @vec_sqrt(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_sqrt(float vector[4])
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
 
 void __attribute__((noinline))
 test_store() {
@@ -1831,78 +955,31 @@ test_store() {
 
 // CHECK-LABEL: @test_store
 
-// CHECK: define available_externally void @_mm_store_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG939:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG940:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG941:[0-9a-zA-Z_%.]+]] = bitcast float* [[REG940]] to <4 x float>*
-// CHECK-NEXT: call void @vec_st(float vector[4], long, float vector[4]*)(<4 x float> noundef [[REG939]], i64 noundef 0, <4 x float>* noundef [[REG941]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store_ps1
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG942:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG943:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store1_ps(float* noundef [[REG942]], <4 x float> noundef [[REG943]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store_ss
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG944:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG945:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG944]], i32 0
-// CHECK-NEXT: [[REG946:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store float [[REG945]], float* [[REG946]], align 4
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store1_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG947:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG948:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG947]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG948]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG949:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG950:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store_ps(float* noundef [[REG949]], <4 x float> noundef [[REG950]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storeh_pi
-// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG951:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG952:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG951]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG952]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG953:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG954:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG953]], i32 1
-// CHECK-NEXT: [[REG955:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG954]], i64* [[REG955]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storel_pi
-// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG956:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG957:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG956]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG957]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG958:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG959:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG958]], i32 0
-// CHECK-NEXT: [[REG960:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG959]], i64* [[REG960]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storer_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG961:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG962:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG963:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef [[REG961]], <4 x float> noundef [[REG962]], <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
-// CHECK-NEXT: store <4 x float> [[REG963]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG964:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG965:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store_ps(float* noundef [[REG964]], <4 x float> noundef [[REG965]])
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally void @_mm_store_ps
+// CHECK: call void @vec_st(float vector[4], long, float vector[4]*)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, <4 x float>* noundef %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally void @_mm_store_ps1
+// CHECK: call void @_mm_store1_ps
+
+// CHECK-LABEL: define available_externally void @_mm_store_ss
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: store float %[[VAL]], float* %{{[0-9a-zA-Z_.]+}}, align 4
+
+// CHECK-LABEL: define available_externally void @_mm_store1_ps
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call void @_mm_store_ps
+
+// CHECK-LABEL: define available_externally void @_mm_storeh_pi
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
+// CHECK: store i64 %[[VAL]], i64* %{{[0-9a-zA-Z_.]+}}, align 8
+
+// CHECK-LABEL: define available_externally void @_mm_storel_pi
+// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: store i64 %[[VAL]], i64* %{{[0-9a-zA-Z_.]+}}, align 8
+
+// CHECK-LABEL: define available_externally void @_mm_storer_ps
+// CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
+// CHECK: call void @_mm_store_ps
 
 void __attribute__((noinline))
 test_stream() {
@@ -1912,25 +989,12 @@ test_stream() {
 
 // CHECK-LABEL: @test_stream
 
-/// CHECK: define available_externally void @_mm_stream_pi
-// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG966:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* [[REG966]])
-// CHECK-NEXT: [[REG967:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG968:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG967]], i64* [[REG968]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_stream_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG969:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(float* [[REG969]])
-// CHECK-NEXT: [[REG970:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG971:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store_ps(float* noundef [[REG970]], <4 x float> noundef [[REG971]])
-// CHECK-NEXT: ret void
+// CHECK-LABEL: define available_externally void @_mm_stream_pi
+// CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* %{{[0-9a-zA-Z_.]+}})
+
+// CHECK-LABEL: define available_externally void @_mm_stream_ps
+// CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(float* %{{[0-9a-zA-Z_.]+}})
+// CHECK: call void @_mm_store_ps
 
 void __attribute__((noinline))
 test_sub() {
@@ -1940,31 +1004,14 @@ test_sub() {
 
 // CHECK-LABEL: @test_sub
 
-// CHECK: define available_externally <4 x float> @_mm_sub_ps(<4 x float> noundef [[REG972:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG973:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG972]], <4 x float>* [[REG974:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG973]], <4 x float>* [[REG975:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG976:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG974]], align 16
-// CHECK-NEXT: [[REG977:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG975]], align 16
-// CHECK-NEXT: [[REG978:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG976]], [[REG977]]
-// CHECK-NEXT: ret <4 x float> [[REG978]]
-
-// CHECK: define available_externally <4 x float> @_mm_sub_ss(<4 x float> noundef [[REG979:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG980:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG979]], <4 x float>* [[REG981:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG980]], <4 x float>* [[REG982:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG983:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
-// CHECK-NEXT: [[REG984:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG983]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG984]], <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG986:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG982]], align 16
-// CHECK-NEXT: [[REG987:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef [[REG986]], i32 noundef zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG987]], <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG989:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG990:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG991:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG989]], [[REG990]]
-// CHECK-NEXT: store <4 x float> [[REG991]], <4 x float>* [[REG992:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG993:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
-// CHECK-NEXT: [[REG994:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG992]], align 16
-// CHECK-NEXT: [[REG995:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef [[REG993]], <4 x float> noundef [[REG994]], <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG995]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ps
+// CHECK: fsub <4 x float>
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ss
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
+// CHECK: fsub <4 x float>
+// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
 
 void __attribute__((noinline))
 test_transpose() {
@@ -1974,17 +1021,14 @@ test_transpose() {
 
 // CHECK-LABEL: @test_transpose
 
-// CHECK: br label %[[REG996:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG996]]:
-// CHECK: [[REG997:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
-// CHECK: [[REG998:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
-// CHECK: [[REG999:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
-// CHECK: [[REG1000:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
-// CHECK: [[REG1001:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
-// CHECK: [[REG1002:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
-// CHECK: [[REG1003:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
-// CHECK: [[REG1004:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
-// CHECK: ret void
+// CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
+// CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
+// CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
+// CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
+// CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
+// CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
 
 void __attribute__((noinline))
 test_ucomi() {
@@ -1998,71 +1042,35 @@ test_ucomi() {
 
 // CHECK-LABEL: @test_ucomi
 
-// CHECK: define available_externally signext i32 @_mm_ucomieq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1005:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1006:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1005]], i32 0
-// CHECK-NEXT: [[REG1007:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1008:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1007]], i32 0
-// CHECK-NEXT: [[REG1009:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG1006]], [[REG1008]]
-// CHECK-NEXT: [[REG1010:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1009]] to i32
-// CHECK-NEXT: ret i32 [[REG1010]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomige_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1011:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1012:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1011]], i32 0
-// CHECK-NEXT: [[REG1013:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1014:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1013]], i32 0
-// CHECK-NEXT: [[REG1015:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG1012]], [[REG1014]]
-// CHECK-NEXT: [[REG1016:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1015]] to i32
-// CHECK-NEXT: ret i32 [[REG1016]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomigt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1017:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1018:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1017]], i32 0
-// CHECK-NEXT: [[REG1019:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1020:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1019]], i32 0
-// CHECK-NEXT: [[REG1021:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG1018]], [[REG1020]]
-// CHECK-NEXT: [[REG1022:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1021]] to i32
-// CHECK-NEXT: ret i32 [[REG1022]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomile_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1023:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1024:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1023]], i32 0
-// CHECK-NEXT: [[REG1025:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1026:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1025]], i32 0
-// CHECK-NEXT: [[REG1027:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG1024]], [[REG1026]]
-// CHECK-NEXT: [[REG1028:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1027]] to i32
-// CHECK-NEXT: ret i32 [[REG1028]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomilt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1029:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1030:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1029]], i32 0
-// CHECK-NEXT: [[REG1031:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1032:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1031]], i32 0
-// CHECK-NEXT: [[REG1033:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG1030]], [[REG1032]]
-// CHECK-NEXT: [[REG1034:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1033]] to i32
-// CHECK-NEXT: ret i32 [[REG1034]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomineq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1035:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1036:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1035]], i32 0
-// CHECK-NEXT: [[REG1037:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1038:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1037]], i32 0
-// CHECK-NEXT: [[REG1039:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG1036]], [[REG1038]]
-// CHECK-NEXT: [[REG1040:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1039]] to i32
-// CHECK-NEXT: ret i32 [[REG1040]]
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fcmp oeq float %[[VAL1]], %[[VAL2]]
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fcmp oge float %[[VAL1]], %[[VAL2]]
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fcmp ogt float %[[VAL1]], %[[VAL2]]
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fcmp ole float %[[VAL1]], %[[VAL2]]
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fcmp olt float %[[VAL1]], %[[VAL2]]
+
+// CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_ss
+// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
+// CHECK: fcmp une float %[[VAL1]], %[[VAL2]]
 
 void __attribute__((noinline))
 test_undefined() {
@@ -2071,12 +1079,10 @@ test_undefined() {
 
 // CHECK-LABEL: @test_undefined
 
-// CHECK: define available_externally <4 x float> @_mm_undefined_ps
-// CHECK: [[REG1041:[0-9a-zA-Z_%.]+]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[REG1042:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
-// CHECK-NEXT: store <4 x float> [[REG1042]], <4 x float>* [[REG1041]], align 16
-// CHECK-NEXT: [[REG1043:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG1043]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_undefined_ps
+// CHECK: alloca <4 x float>, align 16
+// CHECK: load <4 x float>, <4 x float>* %[[ADDR:[0-9a-zA-Z_.]+]], align 16
+// CHECK: load <4 x float>, <4 x float>* %[[ADDR]], align 16
 
 void __attribute__((noinline))
 test_unpack() {
@@ -2086,18 +1092,8 @@ test_unpack() {
 
 // CHECK-LABEL: @test_unpack
 
-// CHECK: define available_externally <4 x float> @_mm_unpackhi_ps(<4 x float> noundef [[REG1044:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG1045:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG1044]], <4 x float>* [[REG1046:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG1045]], <4 x float>* [[REG1047:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1048:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1046]], align 16
-// CHECK-NEXT: [[REG1049:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1047]], align 16
-// CHECK-NEXT: [[REG1050:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])(<4 x float> noundef [[REG1048]], <4 x float> noundef [[REG1049]])
-// CHECK-NEXT: ret <4 x float> [[REG1050]]
-
-// CHECK: define available_externally <4 x float> @_mm_unpacklo_ps(<4 x float> noundef [[REG1051:[0-9a-zA-Z_%.]+]], <4 x float> noundef [[REG1052:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG1051]], <4 x float>* [[REG1053:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG1052]], <4 x float>* [[REG1054:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1055:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1053]], align 16
-// CHECK-NEXT: [[REG1056:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1054]], align 16
-// CHECK-NEXT: [[REG1057:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])(<4 x float> noundef [[REG1055]], <4 x float> noundef [[REG1056]])
-// CHECK-NEXT: ret <4 x float> [[REG1057]]
+// CHECK-LABEL: define available_externally <4 x float> @_mm_unpackhi_ps
+// CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
+
+// CHECK-LABEL: define available_externally <4 x float> @_mm_unpacklo_ps
+// CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])


        


More information about the cfe-commits mailing list