[llvm] [SelectionDAG] Convert to or mask if all insertions are -1 (PR #138213)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 09:29:25 PDT 2025
================
@@ -150,59 +150,32 @@ define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
; SSE2-LABEL: insert_v8i32_x12345x7:
; SSE2: # %bb.0:
-; SSE2-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE2-NEXT: movl $-1, %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
+; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v8i32_x12345x7:
; SSE3: # %bb.0:
-; SSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
-; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE3-NEXT: movl $-1, %eax
-; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
-; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
+; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v8i32_x12345x7:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
-; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSSE3-NEXT: movl $-1, %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
+; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_v8i32_x12345x7:
; SSE41: # %bb.0:
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
----------------
topperc wrote:
What is the speed cost? If the source and destionation are in the FP domain there shouldn't be a cost.
https://github.com/llvm/llvm-project/pull/138213
More information about the llvm-commits
mailing list