[llvm] 391fa37 - [X86] Remove Commutable flag from mpsadbw intrinsics.

Sun Sep 19 13:25:53 PDT 2021

Author: Craig Topper
Date: 2021-09-19T13:22:22-07:00
New Revision: 391fa371fdfbc5ea4d4a924aebb27cb77d483da4

URL: https://github.com/llvm/llvm-project/commit/391fa371fdfbc5ea4d4a924aebb27cb77d483da4
DIFF: https://github.com/llvm/llvm-project/commit/391fa371fdfbc5ea4d4a924aebb27cb77d483da4.diff

LOG: [X86] Remove Commutable flag from mpsadbw intrinsics.

Unlike psadbw, mpsadbw is not commutable because of how it operates
on blocks. We already marked as not commutable for MachineIR, but
had it commutable for the tablegened isel patterns.

Fixes PR51908.

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
    llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 2601f96696ac2..1b941e84b241c 100644

--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -792,7 +792,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
           Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
-                    [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+                    [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 }
 
 // Test instruction with bitwise comparison.
@@ -1779,7 +1779,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v32i8_ty], [IntrNoMem]>;
   def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+                         llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 29b32e7beb167..ca67da6de8c27 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -731,18 +731,33 @@ define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
 }
 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
 
-; FIXME: We shouldn't commute this operation to fold the load.
+; We shouldn't commute this operation to fold the load.
 define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) {
-; X86-LABEL: test_x86_avx2_mpsadbw_load_op0:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    vmpsadbw $7, (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x00,0x07]
-; X86-NEXT:    retl # encoding: [0xc3]
+; X86-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX-NEXT:    vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08]
+; X86-AVX-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
-; X64-LABEL: test_x86_avx2_mpsadbw_load_op0:
-; X64:       # %bb.0:
-; X64-NEXT:    vmpsadbw $7, (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0x07,0x07]
-; X64-NEXT:    retq # encoding: [0xc3]
+; X86-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X86-AVX512VL:       # %bb.0:
+; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-AVX512VL-NEXT:    vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
+; X86-AVX512VL-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
+;
+; X64-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f]
+; X64-AVX-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X64-AVX-NEXT:    retq # encoding: [0xc3]
+;
+; X64-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f]
+; X64-AVX512VL-NEXT:    vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07]
+; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %a0 = load <32 x i8>, <32 x i8>* %ptr
   %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
   ret <16 x i16> %res

diff  --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
index 66e7836f3fb6c..3f7392222db8b 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -116,39 +116,47 @@ define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
 }
 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
 
-; FIXME: We shouldn't commute this operation to fold the load.
+; We shouldn't commute this operation to fold the load.
 define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(<16 x i8>* %ptr, <16 x i8> %a1) {
 ; X86-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0:
 ; X86-SSE:       ## %bb.0:
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT:    mpsadbw $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x00,0x07]
+; X86-SSE-NEXT:    movdqa (%eax), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x08]
+; X86-SSE-NEXT:    mpsadbw $7, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x42,0xc8,0x07]
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X86-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0:
 ; X86-AVX1:       ## %bb.0:
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT:    vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07]
+; X86-AVX1-NEXT:    vmovdqa (%eax), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x08]
+; X86-AVX1-NEXT:    vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X86-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0:
 ; X86-AVX512:       ## %bb.0:
 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT:    vmpsadbw $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x00,0x07]
+; X86-AVX512-NEXT:    vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08]
+; X86-AVX512-NEXT:    vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X64-SSE-LABEL: test_x86_sse41_mpsadbw_load_op0:
 ; X64-SSE:       ## %bb.0:
-; X64-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0x07,0x07]
+; X64-SSE-NEXT:    movdqa (%rdi), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x0f]
+; X64-SSE-NEXT:    mpsadbw $7, %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x3a,0x42,0xc8,0x07]
+; X64-SSE-NEXT:    movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: test_x86_sse41_mpsadbw_load_op0:
 ; X64-AVX1:       ## %bb.0:
-; X64-AVX1-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07]
+; X64-AVX1-NEXT:    vmovdqa (%rdi), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x0f]
+; X64-AVX1-NEXT:    vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
 ;
 ; X64-AVX512-LABEL: test_x86_sse41_mpsadbw_load_op0:
 ; X64-AVX512:       ## %bb.0:
-; X64-AVX512-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0x07,0x07]
+; X64-AVX512-NEXT:    vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f]
+; X64-AVX512-NEXT:    vmpsadbw $7, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x42,0xc0,0x07]
 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
   %a0 = load <16 x i8>, <16 x i8>* %ptr
   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]