[llvm] 1bd836f - [X86] Update AVX512 rotate intrinsic tests to avoid adds

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 6 09:06:08 PST 2022


Author: Simon Pilgrim
Date: 2022-03-06T17:05:44Z
New Revision: 1bd836fa1087dcd6b07ca043b9155089116f1f51

URL: https://github.com/llvm/llvm-project/commit/1bd836fa1087dcd6b07ca043b9155089116f1f51
DIFF: https://github.com/llvm/llvm-project/commit/1bd836fa1087dcd6b07ca043b9155089116f1f51.diff

LOG: [X86] Update AVX512 rotate intrinsic tests to avoid adds

As noticed in D119654, by adding the masked intrinsics results together we can end up with the selects being canonicalized away from the intrinsic - this isn't what we want to test here so replace with a insertvalue chain into a aggregate instead to retain all the results.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/avx512-rotate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/avx512-rotate.ll b/llvm/test/CodeGen/X86/avx512-rotate.ll
index be1fc5ee21eb1..fcf1a5c3e13c2 100644
--- a/llvm/test/CodeGen/X86/avx512-rotate.ll
+++ b/llvm/test/CodeGen/X86/avx512-rotate.ll
@@ -12,230 +12,238 @@ declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32
 
 ; Tests showing replacement of variable rotates with immediate splat versions.
 
-define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
 ; KNL-LABEL: test_splat_rol_v16i32:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprold $6, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprold $7, %zmm0, %zmm0
-; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprold $5, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprold $6, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprold $7, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_rol_v16i32:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprold $6, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprold $7, %zmm0, %zmm0
-; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprold $5, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprold $6, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprold $7, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
+  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
-  %res3 = add <16 x i32> %res, %res1
-  %res4 = add <16 x i32> %res3, %res2
-  ret <16 x i32> %res4
+  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
+  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
+  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
+  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
 }
 
-define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
 ; KNL-LABEL: test_splat_rol_v8i64:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprolq $6, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprolq $7, %zmm0, %zmm0
-; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprolq $5, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprolq $6, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprolq $7, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_rol_v8i64:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprolq $6, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprolq $7, %zmm0, %zmm0
-; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprolq $5, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprolq $6, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprolq $7, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
+  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
-  %res3 = add <8 x i64> %res, %res1
-  %res4 = add <8 x i64> %res3, %res2
-  ret <8 x i64> %res4
+  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
+  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
+  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
+  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
 }
 
-define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
 ; KNL-LABEL: test_splat_ror_v16i32:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprord $6, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprord $7, %zmm0, %zmm0
-; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprord $5, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprord $6, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprord $7, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_ror_v16i32:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprord $6, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprord $7, %zmm0, %zmm0
-; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprord $5, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprord $6, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprord $7, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
+  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
-  %res3 = add <16 x i32> %res, %res1
-  %res4 = add <16 x i32> %res3, %res2
-  ret <16 x i32> %res4
+  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
+  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
+  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
+  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
 }
 
-define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
 ; KNL-LABEL: test_splat_ror_v8i64:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprorq $6, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprorq $7, %zmm0, %zmm0
-; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprorq $5, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprorq $6, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprorq $7, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_ror_v8i64:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprorq $6, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprorq $7, %zmm0, %zmm0
-; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprorq $5, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprorq $6, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprorq $7, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
+  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
-  %res3 = add <8 x i64> %res, %res1
-  %res4 = add <8 x i64> %res3, %res2
-  ret <8 x i64> %res4
+  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
+  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
+  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
+  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
 }
 
 ; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.
 
-define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
 ; KNL-LABEL: test_splat_bounds_rol_v16i32:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprold $30, %zmm0, %zmm0
-; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprold $1, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprold $31, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprold $30, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_bounds_rol_v16i32:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprold $30, %zmm0, %zmm0
-; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprold $1, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprold $31, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprold $30, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
+  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
-  %res3 = add <16 x i32> %res, %res1
-  %res4 = add <16 x i32> %res3, %res2
-  ret <16 x i32> %res4
+  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
+  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
+  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
+  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
 }
 
-define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
 ; KNL-LABEL: test_splat_bounds_rol_v8i64:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprolq $63, %zmm0, %zmm0
-; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprolq $62, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprolq $1, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprolq $63, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_bounds_rol_v8i64:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprolq $63, %zmm0, %zmm0
-; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprolq $62, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprolq $1, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprolq $63, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
+  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
-  %res3 = add <8 x i64> %res, %res1
-  %res4 = add <8 x i64> %res3, %res2
-  ret <8 x i64> %res4
+  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
+  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
+  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
+  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
 }
 
-define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
 ; KNL-LABEL: test_splat_bounds_ror_v16i32:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprord $30, %zmm0, %zmm0
-; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprord $1, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprord $31, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprord $30, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_bounds_ror_v16i32:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprord $30, %zmm0, %zmm0
-; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprord $1, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprord $31, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprord $30, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
+  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
-  %res3 = add <16 x i32> %res, %res1
-  %res4 = add <16 x i32> %res3, %res2
-  ret <16 x i32> %res4
+  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
+  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
+  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
+  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
 }
 
-define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
 ; KNL-LABEL: test_splat_bounds_ror_v8i64:
 ; KNL:       # %bb.0:
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
-; KNL-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; KNL-NEXT:    vprorq $63, %zmm0, %zmm0
-; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; KNL-NEXT:    vprorq $62, %zmm0, %zmm3 {%k1}
+; KNL-NEXT:    vprorq $1, %zmm0, %zmm1 {%k1} {z}
+; KNL-NEXT:    vprorq $63, %zmm0, %zmm2
+; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_splat_bounds_ror_v8i64:
 ; SKX:       # %bb.0:
+; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
 ; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
-; SKX-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
-; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
-; SKX-NEXT:    vprorq $63, %zmm0, %zmm0
-; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    vprorq $62, %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    vprorq $1, %zmm0, %zmm1 {%k1} {z}
+; SKX-NEXT:    vprorq $63, %zmm0, %zmm2
+; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 ; SKX-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
+  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
-  %res3 = add <8 x i64> %res, %res1
-  %res4 = add <8 x i64> %res3, %res2
-  ret <8 x i64> %res4
+  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
+  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
+  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
+  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
 }
 
 ; Constant folding


        


More information about the llvm-commits mailing list