[llvm] 417cd33 - [X86] Add test showing failure to constant fold (V)PMADDUBSW nodes

Sat Jun 15 10:32:19 PDT 2024

Author: Simon Pilgrim
Date: 2024-06-15T18:12:26+01:00
New Revision: 417cd33f6d155d6d16ea29e9bcd81bb4708c9cfa

URL: https://github.com/llvm/llvm-project/commit/417cd33f6d155d6d16ea29e9bcd81bb4708c9cfa
DIFF: https://github.com/llvm/llvm-project/commit/417cd33f6d155d6d16ea29e9bcd81bb4708c9cfa.diff

LOG: [X86] Add test showing failure to constant fold (V)PMADDUBSW nodes

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/combine-pmadd.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/combine-pmadd.ll b/llvm/test/CodeGen/X86/combine-pmadd.ll
index 8a6adbdeb64d8..8c8da55503aa1 100644

--- a/llvm/test/CodeGen/X86/combine-pmadd.ll
+++ b/llvm/test/CodeGen/X86/combine-pmadd.ll
@@ -72,3 +72,47 @@ define <8 x i16> @combine_pmaddubsw_zero_commute(<16 x i8> %a0, <16 x i8> %a1) {
   ret <8 x i16> %1
 }
 
+define i32 @combine_pmaddubsw_constant() {
+; SSE-LABEL: combine_pmaddubsw_constant:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
+; SSE-NEXT:    pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
+; SSE-NEXT:    pextrw $3, %xmm0, %eax
+; SSE-NEXT:    cwtl
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_pmaddubsw_constant:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
+; AVX-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
+; AVX-NEXT:    vpextrw $3, %xmm0, %eax
+; AVX-NEXT:    cwtl
+; AVX-NEXT:    retq
+  %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 -6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>)
+  %2 = extractelement <8 x i16> %1, i32 3 ; ((uint16_t)-6*7)+(7*-8) = (250*7)+(7*-8) = 1694
+  %3 = sext i16 %2 to i32
+  ret i32 %3
+}
+
+define i32 @combine_pmaddubsw_constant_sat() {
+; SSE-LABEL: combine_pmaddubsw_constant_sat:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
+; SSE-NEXT:    pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
+; SSE-NEXT:    movd %xmm0, %eax
+; SSE-NEXT:    cwtl
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_pmaddubsw_constant_sat:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
+; AVX-NEXT:    vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    cwtl
+; AVX-NEXT:    retq
+  %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> <i8 -1, i8 -1, i8 2, i8 3, i8 4, i8 5, i8 -6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> <i8 -128, i8 -128, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>)
+  %2 = extractelement <8 x i16> %1, i32 0 ; add_sat_i16(((uint16_t)-1*-128),((uint16_t)-1*-128)_ = add_sat_i16(255*-128),(255*-128)) = sat_i16(-65280) = -32768
+  %3 = sext i16 %2 to i32
+  ret i32 %3
+}
+