[llvm] r338795 - [X86] Prevent promotion of i16 add/sub/and/or/xor to i32 if we can fold an atomic load and atomic store.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 2 17:37:34 PDT 2018
Author: ctopper
Date: Thu Aug 2 17:37:34 2018
New Revision: 338795
URL: http://llvm.org/viewvc/llvm-project?rev=338795&view=rev
Log:
[X86] Prevent promotion of i16 add/sub/and/or/xor to i32 if we can fold an atomic load and atomic store.
This makes them consistent with i8/i32/i64. Which still seems to be more aggressive on folding than icc, gcc, or MSVC.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
llvm/trunk/test/CodeGen/X86/atomic_mi.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=338795&r1=338794&r2=338795&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug 2 17:37:34 2018
@@ -39768,6 +39768,19 @@ bool X86TargetLowering::IsDesirableToPro
return Ld->getBasePtr() == St->getBasePtr();
};
+ auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) {
+ if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD)
+ return false;
+ if (!Op.hasOneUse())
+ return false;
+ SDNode *User = *Op->use_begin();
+ if (User->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+ auto *Ld = cast<AtomicSDNode>(Load);
+ auto *St = cast<AtomicSDNode>(User);
+ return Ld->getBasePtr() == St->getBasePtr();
+ };
+
bool Commute = false;
switch (Op.getOpcode()) {
default: return false;
@@ -39802,6 +39815,9 @@ bool X86TargetLowering::IsDesirableToPro
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
+ if (IsFoldableAtomicRMW(N0, Op) ||
+ (Commute && IsFoldableAtomicRMW(N1, Op)))
+ return false;
}
}
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=338795&r1=338794&r2=338795&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Thu Aug 2 17:37:34 2018
@@ -896,8 +896,14 @@ multiclass RELEASE_BINOP_MI<SDNode op> {
"#BINOP "#NAME#"8mr PSEUDO!",
[(atomic_store_8 addr:$dst, (op
(atomic_load_8 addr:$dst), GR8:$src))]>;
- // NAME#16 is not generated as 16-bit arithmetic instructions are considered
- // costly and avoided as far as possible by this backend anyway
+ def NAME#16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
+ "#BINOP "#NAME#"16mi PSEUDO!",
+ [(atomic_store_16 addr:$dst, (op
+ (atomic_load_16 addr:$dst), (i16 imm:$src)))]>;
+ def NAME#16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
+ "#BINOP "#NAME#"16mr PSEUDO!",
+ [(atomic_store_16 addr:$dst, (op
+ (atomic_load_16 addr:$dst), GR16:$src))]>;
def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
"#BINOP "#NAME#"32mi PSEUDO!",
[(atomic_store_32 addr:$dst, (op
Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=338795&r1=338794&r2=338795&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Thu Aug 2 17:37:34 2018
@@ -601,24 +601,32 @@ ReSimplify:
case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
+ case X86::RELEASE_ADD16mi: OutMI.setOpcode(X86::ADD16mi); goto ReSimplify;
+ case X86::RELEASE_ADD16mr: OutMI.setOpcode(X86::ADD16mr); goto ReSimplify;
case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
+ case X86::RELEASE_AND16mi: OutMI.setOpcode(X86::AND16mi); goto ReSimplify;
+ case X86::RELEASE_AND16mr: OutMI.setOpcode(X86::AND16mr); goto ReSimplify;
case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
+ case X86::RELEASE_OR16mi: OutMI.setOpcode(X86::OR16mi); goto ReSimplify;
+ case X86::RELEASE_OR16mr: OutMI.setOpcode(X86::OR16mr); goto ReSimplify;
case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
+ case X86::RELEASE_XOR16mi: OutMI.setOpcode(X86::XOR16mi); goto ReSimplify;
+ case X86::RELEASE_XOR16mr: OutMI.setOpcode(X86::XOR16mr); goto ReSimplify;
case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
Modified: llvm/trunk/test/CodeGen/X86/atomic_mi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic_mi.ll?rev=338795&r1=338794&r2=338795&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic_mi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic_mi.ll Thu Aug 2 17:37:34 2018
@@ -209,17 +209,13 @@ define void @add_16i(i16* %p) {
; treat 16 bit arithmetic as expensive on X86/X86_64.
; X64-LABEL: add_16i:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: addl $2, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: addw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: addl $2, %ecx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: addw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 2
@@ -232,17 +228,14 @@ define void @add_16r(i16* %p, i16 %v) {
; treat 16 bit arithmetic as expensive on X86/X86_64.
; X64-LABEL: add_16r:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: addl %esi, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: addw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_16r:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: addw {{[0-9]+}}(%esp), %cx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, %v
@@ -506,17 +499,13 @@ define void @and_16i(i16* %p) {
; treat 16 bit arithmetic as expensive on X86/X86_64.
; X64-LABEL: and_16i:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: andl $2, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: andw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: andl $2, %ecx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: andw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = and i16 %1, 2
@@ -529,17 +518,14 @@ define void @and_16r(i16* %p, i16 %v) {
; treat 16 bit arithmetic as expensive on X86/X86_64.
; X64-LABEL: and_16r:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: andl %esi, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: andw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_16r:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: andw {{[0-9]+}}(%esp), %cx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: andw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = and i16 %1, %v
@@ -751,17 +737,13 @@ define void @or_8r(i8* %p, i8 %v) {
define void @or_16i(i16* %p) {
; X64-LABEL: or_16i:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: orl $2, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: orw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: orl $2, %ecx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: orw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = or i16 %1, 2
@@ -772,17 +754,14 @@ define void @or_16i(i16* %p) {
define void @or_16r(i16* %p, i16 %v) {
; X64-LABEL: or_16r:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: orw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_16r:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: orw {{[0-9]+}}(%esp), %cx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: orw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = or i16 %1, %v
@@ -994,17 +973,13 @@ define void @xor_8r(i8* %p, i8 %v) {
define void @xor_16i(i16* %p) {
; X64-LABEL: xor_16i:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: xorl $2, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: xorw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: xorl $2, %ecx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: xorw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, 2
@@ -1015,17 +990,14 @@ define void @xor_16i(i16* %p) {
define void @xor_16r(i16* %p, i16 %v) {
; X64-LABEL: xor_16r:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: xorl %esi, %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: xorw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_16r:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: xorw {{[0-9]+}}(%esp), %cx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: xorw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, %v
@@ -1226,24 +1198,18 @@ define void @inc_16(i16* %p) {
; treat 16 bit arithmetic as expensive on X86/X86_64.
; FAST_INC-LABEL: inc_16:
; FAST_INC: # %bb.0:
-; FAST_INC-NEXT: movw (%rdi), %ax
-; FAST_INC-NEXT: incl %eax
-; FAST_INC-NEXT: movw %ax, (%rdi)
+; FAST_INC-NEXT: incw (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: inc_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: incl %ecx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: incw (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: inc_16:
; SLOW_INC: # %bb.0:
-; SLOW_INC-NEXT: movw (%rdi), %ax
-; SLOW_INC-NEXT: addl $1, %eax
-; SLOW_INC-NEXT: movw %ax, (%rdi)
+; SLOW_INC-NEXT: addw $1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 1
@@ -1379,24 +1345,18 @@ define void @dec_16(i16* %p) {
; treat 16 bit arithmetic as expensive on X86/X86_64.
; FAST_INC-LABEL: dec_16:
; FAST_INC: # %bb.0:
-; FAST_INC-NEXT: movw (%rdi), %ax
-; FAST_INC-NEXT: decl %eax
-; FAST_INC-NEXT: movw %ax, (%rdi)
+; FAST_INC-NEXT: decw (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: dec_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: decl %ecx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: decw (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: dec_16:
; SLOW_INC: # %bb.0:
-; SLOW_INC-NEXT: movw (%rdi), %ax
-; SLOW_INC-NEXT: addl $-1, %eax
-; SLOW_INC-NEXT: movw %ax, (%rdi)
+; SLOW_INC-NEXT: addw $-1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i16, i16* %p acquire, align 2
%2 = sub i16 %1, 1
@@ -1527,17 +1487,13 @@ define void @not_16(i16* %p) {
; treat 16 bit arithmetic as expensive on X86/X86_64.
; X64-LABEL: not_16:
; X64: # %bb.0:
-; X64-NEXT: movw (%rdi), %ax
-; X64-NEXT: notl %eax
-; X64-NEXT: movw %ax, (%rdi)
+; X64-NEXT: notw (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: not_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movw (%eax), %cx
-; X32-NEXT: notl %ecx
-; X32-NEXT: movw %cx, (%eax)
+; X32-NEXT: notw (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, -1
More information about the llvm-commits
mailing list