[llvm] 1580f4b - [AArch64] Update costs for fshl/r and add rotr/l variants. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 24 06:42:43 PST 2025


Author: David Green
Date: 2025-11-24T14:42:37Z
New Revision: 1580f4b038c9945bf73d33b25459bece2f67ace7

URL: https://github.com/llvm/llvm-project/commit/1580f4b038c9945bf73d33b25459bece2f67ace7
DIFF: https://github.com/llvm/llvm-project/commit/1580f4b038c9945bf73d33b25459bece2f67ace7.diff

LOG: [AArch64] Update costs for fshl/r and add rotr/l variants. NFC

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/AArch64/fshl.ll
    llvm/test/Analysis/CostModel/AArch64/fshr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 9d06b4bdec9b4..cd6068d382169 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -5,277 +5,544 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define i8 @fshl_i8_3rd_arg_const(i8 %a, i8 %b) {
 ; CHECK-LABEL: 'fshl_i8_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
 ;
 entry:
-  %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
-  ret i8 %fshl
+  %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 9)
+  ret i8 %r
 }
 
 define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
 ; CHECK-LABEL: 'fshl_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
 ;
 entry:
-  %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
-  ret i8 %fshl
+  %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
+  ret i8 %r
 }
 
-declare i8 @llvm.fshl.i8(i8, i8, i8)
-
-define i16 @fshl_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: 'fshl_i16'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %fshl
+define i16 @fshl_i16_3rd_arg_const(i16 %a, i16 %b) {
+; CHECK-LABEL: 'fshl_i16_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
 ;
 entry:
-  %fshl = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
-  ret i16 %fshl
+  %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 9)
+  ret i16 %r
 }
 
-declare i16 @llvm.fshl.i16(i16, i16, i16)
+define i16 @fshl_i16_3rd_arg_var(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: 'fshl_i16_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
+;
+entry:
+  %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %b, i16 %c)
+  ret i16 %r
+}
 
 define i32 @fshl_i32_3rd_arg_const(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'fshl_i32_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
 ;
 entry:
-  %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
-  ret i32 %fshl
+  %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 9)
+  ret i32 %r
 }
 
 define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: 'fshl_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
 ;
 entry:
-  %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-  ret i32 %fshl
+  %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %r
 }
 
-declare i32 @llvm.fshl.i32(i32, i32, i32)
-
 define i64 @fshl_i64_3rd_arg_const(i64 %a, i64 %b) {
 ; CHECK-LABEL: 'fshl_i64_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
 ;
 entry:
-  %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
-  ret i64 %fshl
+  %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 9)
+  ret i64 %r
 }
 
 define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: 'fshl_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
+;
+entry:
+  %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %r
+}
+
+define i128 @fshl_i128_3rd_arg_const(i128 %a, i128 %b) {
+; CHECK-LABEL: 'fshl_i128_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
 ;
 entry:
-  %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
-  ret i64 %fshl
+  %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
+  ret i128 %r
 }
 
-declare i64 @llvm.fshl.i64(i64, i64, i64)
+define i128 @fshl_i128_3rd_arg_var(i128 %a, i128 %b, i128 %c) {
+; CHECK-LABEL: 'fshl_i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:9 Lat:9 SizeLat:9 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
+;
+entry:
+  %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 %c)
+  ret i128 %r
+}
 
 define i19 @fshl_i19(i19 %a, i19 %b) {
 ; CHECK-LABEL: 'fshl_i19'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %r
 ;
 entry:
-  %fshl = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
-  ret i19 %fshl
+  %r = tail call i19 @llvm.fshl.i19(i19 %a, i19 %b, i19 9)
+  ret i19 %r
 }
 
-declare i19 @llvm.fshl.i19(i19, i19, i19)
+define i66 @fshl_i66(i66 %a, i66 %b) {
+; CHECK-LABEL: 'fshl_i66'
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %r = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %r
+;
+entry:
+  %r = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
+  ret i66 %r
+}
 
 
 define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
 ;
 entry:
-  %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-  ret <16 x i8> %fshl
+  %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %r
 }
 
 define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_lanes_
diff erent(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
 ;
 entry:
-  %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
-  ret <16 x i8> %fshl
+  %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+  ret <16 x i8> %r
 }
 
 define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
 ;
 entry:
-  %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-  ret <16 x i8> %fshl
+  %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %r
 }
 
-declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
-
 define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
 ;
 entry:
-  %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-  ret <8 x i16> %fshl
+  %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %r
 }
 
 define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_lanes_
diff erent(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
 ;
 entry:
-  %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
-  ret <8 x i16> %fshl
+  %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+  ret <8 x i16> %r
 }
 
 define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
 ;
 entry:
-  %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
-  ret <8 x i16> %fshl
+  %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+  ret <8 x i16> %r
 }
 
-declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
-
 define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
 ;
 entry:
-  %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
-  ret <4 x i32> %fshl
+  %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %r
 }
 
 define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_lanes_
diff erent(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
 ;
 entry:
-  %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
-  ret <4 x i32> %fshl
+  %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+  ret <4 x i32> %r
 }
 
 define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
 ;
 entry:
-  %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
-  ret <4 x i32> %fshl
+  %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %r
 }
 
-declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-
 define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
 ;
 entry:
-  %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
-  ret <2 x i64> %fshl
+  %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+  ret <2 x i64> %r
 }
 
 define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_lanes_
diff erent(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
 ;
 entry:
-  %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
-  ret <2 x i64> %fshl
+  %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+  ret <2 x i64> %r
 }
 
 define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
 ;
 entry:
-  %fshl = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
-  ret <2 x i64> %fshl
+  %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+  ret <2 x i64> %r
 }
 
-declare <2 x i64> @llvm.fshl.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
-
 define <4 x i30> @fshl_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
 ; CHECK-LABEL: 'fshl_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %r = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %r
 ;
 entry:
-  %fshl = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
-  ret <4 x i30> %fshl
+  %r = tail call <4 x i30> @llvm.fshl.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+  ret <4 x i30> %r
 }
 
-declare <4 x i30> @llvm.fshl.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
-
 define <2 x i66> @fshl_v2i66_3rd_arg_vec_const_lanes_
diff erent(<2 x i66> %a, <2 x i66> %b) {
 ; CHECK-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %r
 ;
 entry:
-  %fshl = tail call <2 x i66> @llvm.fshl.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
-  ret <2 x i66> %fshl
+  %r = tail call <2 x i66> @llvm.fshl.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+  ret <2 x i66> %r
 }
-declare <2 x i66> @llvm.fshl.v4i66(<2 x i66>, <2 x i66>, <2 x i66>)
 
-define i66 @fshl_i66(i66 %a, i66 %b) {
-; CHECK-LABEL: 'fshl_i66'
-; CHECK-NEXT:  Cost Model: Found costs of 3 for: %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %fshl
+define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a, <2 x i128> %b) {
+; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> splat (i128 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
 ;
 entry:
-  %fshl = tail call i66 @llvm.fshl.i66(i66 %a, i66 %b, i66 9)
-  ret i66 %fshl
+  %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 1>)
+  ret <2 x i128> %r
 }
 
-declare i66 @llvm.fshl.i66(i66, i66, i66)
-
 define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_lanes_
diff erent(<2 x i128> %a, <2 x i128> %b) {
 ; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshl
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+  ret <2 x i128> %r
+}
+
+define <2 x i128> @fshl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) {
+; CHECK-LABEL: 'fshl_v2i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:17 Lat:21 SizeLat:21 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c)
+  ret <2 x i128> %r
+}
+
+
+; Rotate tests
+
+define i8 @rotl_i8_3rd_arg_const(i8 %a) {
+; CHECK-LABEL: 'rotl_i8_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
+;
+entry:
+  %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 9)
+  ret i8 %r
+}
+
+define i8 @rotl_i8_3rd_arg_var(i8 %a, i8 %c) {
+; CHECK-LABEL: 'rotl_i8_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
+;
+entry:
+  %r = tail call i8 @llvm.fshl.i8(i8 %a, i8 %a, i8 %c)
+  ret i8 %r
+}
+
+define i16 @rotl_i16_3rd_arg_const(i16 %a) {
+; CHECK-LABEL: 'rotl_i16_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
+;
+entry:
+  %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 9)
+  ret i16 %r
+}
+
+define i16 @rotl_i16_3rd_arg_var(i16 %a, i16 %c) {
+; CHECK-LABEL: 'rotl_i16_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
+;
+entry:
+  %r = tail call i16 @llvm.fshl.i16(i16 %a, i16 %a, i16 %c)
+  ret i16 %r
+}
+
+define i32 @rotl_i32_3rd_arg_const(i32 %a) {
+; CHECK-LABEL: 'rotl_i32_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
+;
+entry:
+  %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 9)
+  ret i32 %r
+}
+
+define i32 @rotl_i32_3rd_arg_var(i32 %a, i32 %c) {
+; CHECK-LABEL: 'rotl_i32_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
+;
+entry:
+  %r = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %c)
+  ret i32 %r
+}
+
+define i64 @rotl_i64_3rd_arg_const(i64 %a) {
+; CHECK-LABEL: 'rotl_i64_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
+;
+entry:
+  %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 9)
+  ret i64 %r
+}
+
+define i64 @rotl_i64_3rd_arg_var(i64 %a, i64 %c) {
+; CHECK-LABEL: 'rotl_i64_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
 ;
 entry:
-  %fshl = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
-  ret <2 x i128> %fshl
+  %r = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %c)
+  ret i64 %r
 }
-declare <2 x i128> @llvm.fshl.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
-define i128 @fshl_i128(i128 %a, i128 %b) {
-; CHECK-LABEL: 'fshl_i128'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshl
+define i128 @rotl_i128_3rd_arg_const(i128 %a) {
+; CHECK-LABEL: 'rotl_i128_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
 ;
 entry:
-  %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
-  ret i128 %fshl
+  %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 9)
+  ret i128 %r
 }
 
-declare i128 @llvm.fshl.i128(i128, i128, i128)
+define i128 @rotl_i128_3rd_arg_var(i128 %a, i128 %c) {
+; CHECK-LABEL: 'rotl_i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
+;
+entry:
+  %r = tail call i128 @llvm.fshl.i128(i128 %a, i128 %a, i128 %c)
+  ret i128 %r
+}
+
+define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a) {
+; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
+;
+entry:
+  %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %r
+}
+
+define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_lanes_
diff erent(<16 x i8> %a) {
+; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
+;
+entry:
+  %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+  ret <16 x i8> %r
+}
+
+define <16 x i8> @rotl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %c) {
+; CHECK-LABEL: 'rotl_v16i8_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
+;
+entry:
+  %r = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c)
+  ret <16 x i8> %r
+}
+
+define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a) {
+; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
+;
+entry:
+  %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_lanes_
diff erent(<8 x i16> %a) {
+; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
+;
+entry:
+  %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @rotl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %c) {
+; CHECK-LABEL: 'rotl_v8i16_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
+;
+entry:
+  %r = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c)
+  ret <8 x i16> %r
+}
+
+define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a) {
+; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
+;
+entry:
+  %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_lanes_
diff erent(<4 x i32> %a) {
+; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
+;
+entry:
+  %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @rotl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %c) {
+; CHECK-LABEL: 'rotl_v4i32_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
+;
+entry:
+  %r = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c)
+  ret <4 x i32> %r
+}
+
+define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a) {
+; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
+;
+entry:
+  %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 1>)
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_lanes_
diff erent(<2 x i64> %a) {
+; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
+;
+entry:
+  %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>)
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @rotl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %c) {
+; CHECK-LABEL: 'rotl_v2i64_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
+;
+entry:
+  %r = tail call <2 x i64> @llvm.fshl.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c)
+  ret <2 x i64> %r
+}
+
+define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a) {
+; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> splat (i128 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 1>)
+  ret <2 x i128> %r
+}
+
+define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_lanes_
diff erent(<2 x i128> %a) {
+; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>)
+  ret <2 x i128> %r
+}
+
+define <2 x i128> @rotl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %c) {
+; CHECK-LABEL: 'rotl_v2i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshl.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c)
+  ret <2 x i128> %r
+}

diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index b31806b647868..795371e9f3f68 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -5,277 +5,544 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define i8 @fshr_i8_3rd_arg_const(i8 %a, i8 %b) {
 ; CHECK-LABEL: 'fshr_i8_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
 ;
 entry:
-  %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
-  ret i8 %fshr
+  %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 9)
+  ret i8 %r
 }
 
 define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
 ; CHECK-LABEL: 'fshr_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
 ;
 entry:
-  %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
-  ret i8 %fshr
+  %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
+  ret i8 %r
 }
 
-declare i8 @llvm.fshr.i8(i8, i8, i8)
-
-define i16 @fshr_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: 'fshr_i16'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %fshr
+define i16 @fshr_i16_3rd_arg_const(i16 %a, i16 %b) {
+; CHECK-LABEL: 'fshr_i16_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
 ;
 entry:
-  %fshr = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
-  ret i16 %fshr
+  %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 9)
+  ret i16 %r
 }
 
-declare i16 @llvm.fshr.i16(i16, i16, i16)
+define i16 @fshr_i16_3rd_arg_var(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: 'fshr_i16_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
+;
+entry:
+  %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %c)
+  ret i16 %r
+}
 
 define i32 @fshr_i32_3rd_arg_const(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'fshr_i32_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
 ;
 entry:
-  %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
-  ret i32 %fshr
+  %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 9)
+  ret i32 %r
 }
 
 define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: 'fshr_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
 ;
 entry:
-  %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
-  ret i32 %fshr
+  %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %r
 }
 
-declare i32 @llvm.fshr.i32(i32, i32, i32)
-
 define i64 @fshr_i64_3rd_arg_const(i64 %a, i64 %b) {
 ; CHECK-LABEL: 'fshr_i64_3rd_arg_const'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
 ;
 entry:
-  %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
-  ret i64 %fshr
+  %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 9)
+  ret i64 %r
 }
 
 define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: 'fshr_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
+;
+entry:
+  %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %r
+}
+
+define i128 @fshr_i128_3rd_arg_const(i128 %a, i128 %b) {
+; CHECK-LABEL: 'fshr_i128_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
 ;
 entry:
-  %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
-  ret i64 %fshr
+  %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
+  ret i128 %r
 }
 
-declare i64 @llvm.fshr.i64(i64, i64, i64)
+define i128 @fshr_i128_3rd_arg_var(i128 %a, i128 %b, i128 %c) {
+; CHECK-LABEL: 'fshr_i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:9 Lat:9 SizeLat:9 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
+;
+entry:
+  %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 %c)
+  ret i128 %r
+}
 
 define i19 @fshr_i19(i19 %a, i19 %b) {
 ; CHECK-LABEL: 'fshr_i19'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i19 %r
 ;
 entry:
-  %fshr = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
-  ret i19 %fshr
+  %r = tail call i19 @llvm.fshr.i19(i19 %a, i19 %b, i19 9)
+  ret i19 %r
 }
 
-declare i19 @llvm.fshr.i19(i19, i19, i19)
+define i66 @fshr_i66(i66 %a, i66 %b) {
+; CHECK-LABEL: 'fshr_i66'
+; CHECK-NEXT:  Cost Model: Found costs of 3 for: %r = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %r
+;
+entry:
+  %r = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
+  ret i66 %r
+}
 
 
 define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
 ;
 entry:
-  %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-  ret <16 x i8> %fshr
+  %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %r
 }
 
 define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_lanes_
diff erent(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
 ;
 entry:
-  %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
-  ret <16 x i8> %fshr
+  %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+  ret <16 x i8> %r
 }
 
 define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
 ;
 entry:
-  %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-  ret <16 x i8> %fshr
+  %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
+  ret <16 x i8> %r
 }
 
-declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
-
 define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
 ;
 entry:
-  %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-  ret <8 x i16> %fshr
+  %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %r
 }
 
 define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_lanes_
diff erent(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
 ;
 entry:
-  %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
-  ret <8 x i16> %fshr
+  %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+  ret <8 x i16> %r
 }
 
 define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
 ;
 entry:
-  %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
-  ret <8 x i16> %fshr
+  %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
+  ret <8 x i16> %r
 }
 
-declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
-
 define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
 ;
 entry:
-  %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
-  ret <4 x i32> %fshr
+  %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %r
 }
 
 define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_lanes_
diff erent(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
 ;
 entry:
-  %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
-  ret <4 x i32> %fshr
+  %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+  ret <4 x i32> %r
 }
 
 define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
 ;
 entry:
-  %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
-  ret <4 x i32> %fshr
+  %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %r
 }
 
-declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-
 define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
 ;
 entry:
-  %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
-  ret <2 x i64> %fshr
+  %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+  ret <2 x i64> %r
 }
 
 define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_lanes_
diff erent(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 6 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
 ;
 entry:
-  %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
-  ret <2 x i64> %fshr
+  %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 2>)
+  ret <2 x i64> %r
 }
 
 define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of 7 for: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of 7 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
 ;
 entry:
-  %fshr = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
-  ret <2 x i64> %fshr
+  %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+  ret <2 x i64> %r
 }
 
-declare <2 x i64> @llvm.fshr.v4i64(<2 x i64>, <2 x i64>, <2 x i64>)
-
 define <4 x i30> @fshr_v4i30_3rd_arg_var(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c) {
 ; CHECK-LABEL: 'fshr_v4i30_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %r = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i30> %r
 ;
 entry:
-  %fshr = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
-  ret <4 x i30> %fshr
+  %r = tail call <4 x i30> @llvm.fshr.v4i30(<4 x i30> %a, <4 x i30> %b, <4 x i30> %c)
+  ret <4 x i30> %r
 }
 
-declare <4 x i30> @llvm.fshr.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
-
 define <2 x i66> @fshr_v2i66_3rd_arg_vec_const_lanes_
diff erent(<2 x i66> %a, <2 x i66> %b) {
 ; CHECK-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %r
 ;
 entry:
-  %fshr = tail call <2 x i66> @llvm.fshr.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
-  ret <2 x i66> %fshr
+  %r = tail call <2 x i66> @llvm.fshr.v4i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+  ret <2 x i66> %r
 }
-declare <2 x i66> @llvm.fshr.v4i66(<2 x i66>, <2 x i66>, <2 x i66>)
 
-define i66 @fshr_i66(i66 %a, i66 %b) {
-; CHECK-LABEL: 'fshr_i66'
-; CHECK-NEXT:  Cost Model: Found costs of 3 for: %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i66 %fshr
+define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a, <2 x i128> %b) {
+; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> splat (i128 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
 ;
 entry:
-  %fshr = tail call i66 @llvm.fshr.i66(i66 %a, i66 %b, i66 9)
-  ret i66 %fshr
+  %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 1>)
+  ret <2 x i128> %r
 }
 
-declare i66 @llvm.fshr.i66(i66, i66, i66)
-
 define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_lanes_
diff erent(<2 x i128> %a, <2 x i128> %b) {
 ; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_
diff erent'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshr
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+  ret <2 x i128> %r
+}
+
+define <2 x i128> @fshr_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c) {
+; CHECK-LABEL: 'fshr_v2i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:17 Lat:21 SizeLat:21 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %c)
+  ret <2 x i128> %r
+}
+
+
+; Rotate tests
+
+define i8 @rotl_i8_3rd_arg_const(i8 %a) {
+; CHECK-LABEL: 'rotl_i8_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
+;
+entry:
+  %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 9)
+  ret i8 %r
+}
+
+define i8 @rotl_i8_3rd_arg_var(i8 %a, i8 %c) {
+; CHECK-LABEL: 'rotl_i8_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i8 %r
+;
+entry:
+  %r = tail call i8 @llvm.fshr.i8(i8 %a, i8 %a, i8 %c)
+  ret i8 %r
+}
+
+define i16 @rotl_i16_3rd_arg_const(i16 %a) {
+; CHECK-LABEL: 'rotl_i16_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
+;
+entry:
+  %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 9)
+  ret i16 %r
+}
+
+define i16 @rotl_i16_3rd_arg_var(i16 %a, i16 %c) {
+; CHECK-LABEL: 'rotl_i16_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i16 %r
+;
+entry:
+  %r = tail call i16 @llvm.fshr.i16(i16 %a, i16 %a, i16 %c)
+  ret i16 %r
+}
+
+define i32 @rotl_i32_3rd_arg_const(i32 %a) {
+; CHECK-LABEL: 'rotl_i32_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
+;
+entry:
+  %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 9)
+  ret i32 %r
+}
+
+define i32 @rotl_i32_3rd_arg_var(i32 %a, i32 %c) {
+; CHECK-LABEL: 'rotl_i32_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 %r
+;
+entry:
+  %r = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %c)
+  ret i32 %r
+}
+
+define i64 @rotl_i64_3rd_arg_const(i64 %a) {
+; CHECK-LABEL: 'rotl_i64_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
+;
+entry:
+  %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 9)
+  ret i64 %r
+}
+
+define i64 @rotl_i64_3rd_arg_var(i64 %a, i64 %c) {
+; CHECK-LABEL: 'rotl_i64_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i64 %r
 ;
 entry:
-  %fshr = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
-  ret <2 x i128> %fshr
+  %r = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %c)
+  ret i64 %r
 }
-declare <2 x i128> @llvm.fshr.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
-define i128 @fshr_i128(i128 %a, i128 %b) {
-; CHECK-LABEL: 'fshr_i128'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshr
+define i128 @rotl_i128_3rd_arg_const(i128 %a) {
+; CHECK-LABEL: 'rotl_i128_3rd_arg_const'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
 ;
 entry:
-  %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
-  ret i128 %fshr
+  %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 9)
+  ret i128 %r
 }
 
-declare i128 @llvm.fshr.i128(i128, i128, i128)
+define i128 @rotl_i128_3rd_arg_var(i128 %a, i128 %c) {
+; CHECK-LABEL: 'rotl_i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %r
+;
+entry:
+  %r = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %c)
+  ret i128 %r
+}
+
+define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a) {
+; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
+;
+entry:
+  %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+  ret <16 x i8> %r
+}
+
+define <16 x i8> @rotl_v16i8_3rd_arg_vec_const_lanes_
diff erent(<16 x i8> %a) {
+; CHECK-LABEL: 'rotl_v16i8_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
+;
+entry:
+  %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 9, i8 1, i8 13, i8 7, i8 31, i8 23, i8 43, i8 51, i8 3, i8 3, i8 17, i8 3, i8 11, i8 15, i8 3, i8 3>)
+  ret <16 x i8> %r
+}
+
+define <16 x i8> @rotl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %c) {
+; CHECK-LABEL: 'rotl_v16i8_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %r
+;
+entry:
+  %r = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> %c)
+  ret <16 x i8> %r
+}
+
+define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a) {
+; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
+;
+entry:
+  %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @rotl_v8i16_3rd_arg_vec_const_lanes_
diff erent(<8 x i16> %a) {
+; CHECK-LABEL: 'rotl_v8i16_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
+;
+entry:
+  %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> <i16 3, i16 1, i16 13, i16 8, i16 7, i16 31, i16 43, i16 51>)
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @rotl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %c) {
+; CHECK-LABEL: 'rotl_v8i16_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %r
+;
+entry:
+  %r = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> %c)
+  ret <8 x i16> %r
+}
+
+define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a) {
+; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 3))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
+;
+entry:
+  %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @rotl_v4i32_3rd_arg_vec_const_lanes_
diff erent(<4 x i32> %a) {
+; CHECK-LABEL: 'rotl_v4i32_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
+;
+entry:
+  %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 11, i32 2>)
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @rotl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %c) {
+; CHECK-LABEL: 'rotl_v4i32_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %r
+;
+entry:
+  %r = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> %c)
+  ret <4 x i32> %r
+}
+
+define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a) {
+; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
+;
+entry:
+  %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 1>)
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @rotl_v2i64_3rd_arg_vec_const_lanes_
diff erent(<2 x i64> %a) {
+; CHECK-LABEL: 'rotl_v2i64_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of 4 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
+;
+entry:
+  %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> <i64 1, i64 2>)
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @rotl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %c) {
+; CHECK-LABEL: 'rotl_v2i64_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of 5 for: %r = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %r
+;
+entry:
+  %r = tail call <2 x i64> @llvm.fshr.v4i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> %c)
+  ret <2 x i64> %r
+}
+
+define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_all_lanes_same(<2 x i128> %a) {
+; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_all_lanes_same'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> splat (i128 1))
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 1>)
+  ret <2 x i128> %r
+}
+
+define <2 x i128> @rotl_v2i128_3rd_arg_vec_const_lanes_
diff erent(<2 x i128> %a) {
+; CHECK-LABEL: 'rotl_v2i128_3rd_arg_vec_const_lanes_
diff erent'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> <i128 1, i128 2>)
+  ret <2 x i128> %r
+}
+
+define <2 x i128> @rotl_v2i128_3rd_arg_var(<2 x i128> %a, <2 x i128> %c) {
+; CHECK-LABEL: 'rotl_v2i128_3rd_arg_var'
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:5 Lat:5 SizeLat:5 for: %r = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %r
+;
+entry:
+  %r = tail call <2 x i128> @llvm.fshr.v4i128(<2 x i128> %a, <2 x i128> %a, <2 x i128> %c)
+  ret <2 x i128> %r
+}


        


More information about the llvm-commits mailing list