[llvm] f5700e7 - [DAGCombine][X86] Pull one-use `freeze` out of `extract_vector_elt` vector operand
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 22 13:03:49 PST 2022
Author: Roman Lebedev
Date: 2022-12-23T00:03:26+03:00
New Revision: f5700e7b69048de958172fb513b336564e7f8709
URL: https://github.com/llvm/llvm-project/commit/f5700e7b69048de958172fb513b336564e7f8709
DIFF: https://github.com/llvm/llvm-project/commit/f5700e7b69048de958172fb513b336564e7f8709.diff
LOG: [DAGCombine][X86] Pull one-use `freeze` out of `extract_vector_elt` vector operand
This may allow us to further simplify the vector,
and freezing the extracted result is still fine:
```
----------------------------------------
define i8 @src(<2 x i8> %src, i64 %idx) {
%0:
%i1 = freeze <2 x i8> %src
%i2 = extractelement <2 x i8> %i1, i64 %idx
ret i8 %i2
}
=>
define i8 @tgt(<2 x i8> %src, i64 %idx) {
%0:
%i1 = extractelement <2 x i8> %src, i64 %idx
%i2 = freeze i8 %i1
ret i8 %i2
}
Transformation seems to be correct!
```
BUT, there must not be other uses of that freeze,
see `@freeze_extractelement_extra_use`.
Also, looks like we are missing some ISEL-level handling for freeze.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/freeze-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c62de472e66a..5c074c84f195 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20281,6 +20281,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
+ // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
+ if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
+ return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
+ VecOp.getOperand(0), Index));
+ }
+
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll
index c10e35c2cf0d..df6b76e7820e 100644
--- a/llvm/test/CodeGen/X86/freeze-vector.ll
+++ b/llvm/test/CodeGen/X86/freeze-vector.ll
@@ -65,13 +65,13 @@ define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_from_wider_elt:
; X64: # %bb.0:
-; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: vmovlps %xmm0, (%rsi)
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, (%rsi)
; X64-NEXT: retq
%i0 = load <4 x i16>, ptr %origin
%i1 = bitcast <4 x i16> %i0 to <8 x i8>
@@ -88,15 +88,14 @@ define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovsd %xmm0, (%ecx)
-; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_from_wider_elt_escape:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: vmovq %rax, %xmm0
; X64-NEXT: movq %rax, (%rsi)
-; X64-NEXT: vmovq %xmm0, (%rdx)
+; X64-NEXT: movq %rax, (%rdx)
; X64-NEXT: retq
%i0 = load <4 x i16>, ptr %origin
%i1 = bitcast <4 x i16> %i0 to <8 x i8>
@@ -113,13 +112,13 @@ define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_to_wider_elt:
; X64: # %bb.0:
-; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: vmovlps %xmm0, (%rsi)
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, (%rsi)
; X64-NEXT: retq
%i0 = load <8 x i8>, ptr %origin
%i1 = bitcast <8 x i8> %i0 to <4 x i16>
@@ -136,15 +135,14 @@ define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %d
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovsd %xmm0, (%ecx)
-; X86-NEXT: vmovlps %xmm0, (%eax)
+; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_to_wider_elt_escape:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: vmovq %rax, %xmm0
; X64-NEXT: movq %rax, (%rsi)
-; X64-NEXT: vmovq %xmm0, (%rdx)
+; X64-NEXT: movq %rax, (%rdx)
; X64-NEXT: retq
%i0 = load <8 x i8>, ptr %origin
%i1 = bitcast <8 x i8> %i0 to <4 x i16>
@@ -163,14 +161,16 @@ define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwin
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovdqa (%edx), %xmm0
; X86-NEXT: vpand (%ecx), %xmm0, %xmm0
-; X86-NEXT: vpextrb $6, %xmm0, (%eax)
+; X86-NEXT: vpextrb $6, %xmm0, %ecx
+; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_extractelement:
; X64: # %bb.0:
; X64-NEXT: vmovdqa (%rdi), %xmm0
; X64-NEXT: vpand (%rsi), %xmm0, %xmm0
-; X64-NEXT: vpextrb $6, %xmm0, (%rdx)
+; X64-NEXT: vpextrb $6, %xmm0, %eax
+; X64-NEXT: movb %al, (%rdx)
; X64-NEXT: retq
%i0 = load <16 x i8>, ptr %origin0
%i1 = load <16 x i8>, ptr %origin1
@@ -211,6 +211,8 @@ define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst,
store i8 %i4, ptr %dst
ret void
}
+
+; It would be a miscompilation to pull freeze out of extractelement here.
define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind {
; X86-LABEL: freeze_extractelement_extra_use:
; X86: # %bb.0:
More information about the llvm-commits
mailing list