[llvm] r356170 - Allow code motion (and thus folding) for atomic (but unordered) memory operands

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 14 10:20:59 PDT 2019


Author: reames
Date: Thu Mar 14 10:20:59 2019
New Revision: 356170

URL: http://llvm.org/viewvc/llvm-project?rev=356170&view=rev
Log:
Allow code motion (and thus folding) for atomic (but unordered) memory operands

Building on the work done in D57601, now that we can distinguish between atomic and volatile memory accesses, go ahead and allow code motion of unordered atomics. As seen in the diffs, this allows much better folding of memory operations into using instructions. (Mostly done by the PeepholeOpt pass.)

Note: I have not reviewed all callers of hasOrderedMemoryRef since one of them - isSafeToMove - is very widely used. I'm relying on the documented semantics of each method to judge correctness.

Differential Revision: https://reviews.llvm.org/D59345


Modified:
    llvm/trunk/lib/CodeGen/MachineInstr.cpp
    llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll
    llvm/trunk/test/CodeGen/X86/atomic-unordered.ll

Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=356170&r1=356169&r2=356170&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Thu Mar 14 10:20:59 2019
@@ -1291,10 +1291,8 @@ bool MachineInstr::hasOrderedMemoryRef()
     return true;
 
   // Check if any of our memory operands are ordered.
-  // TODO: This should probably be be isUnordered (see D57601), but the callers
-  // need audited and test cases written to be sure.
   return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) {
-    return MMO->isVolatile() || MMO->isAtomic();
+    return !MMO->isUnordered();
   });
 }
 

Modified: llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll?rev=356170&r1=356169&r2=356170&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll Thu Mar 14 10:20:59 2019
@@ -62,8 +62,7 @@ define half @load_half(half* %fptr) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl %ax, %edi
+; CHECK-NEXT:    movzwl (%rdi), %edi
 ; CHECK-NEXT:    callq __gnu_h2f_ieee
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
@@ -75,8 +74,7 @@ define half @load_half(half* %fptr) {
 define float @load_float(float* %fptr) {
 ; CHECK-LABEL: load_float:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movd (%rdi), %xmm0
 ; CHECK-NEXT:    retq
   %v = load atomic float, float* %fptr unordered, align 4
   ret float %v
@@ -85,8 +83,7 @@ define float @load_float(float* %fptr) {
 define double @load_double(double* %fptr) {
 ; CHECK-LABEL: load_double:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    movq (%rdi), %xmm0
 ; CHECK-NEXT:    retq
   %v = load atomic double, double* %fptr unordered, align 8
   ret double %v

Modified: llvm/trunk/test/CodeGen/X86/atomic-unordered.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-unordered.ll?rev=356170&r1=356169&r2=356170&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-unordered.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-unordered.ll Thu Mar 14 10:20:59 2019
@@ -437,7 +437,6 @@ define i64 @load_fold_add1(i64* %p) {
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_add2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_add2:
 ; CHECK-O0:       # %bb.0:
@@ -447,15 +446,14 @@ define i64 @load_fold_add2(i64* %p, i64
 ;
 ; CHECK-O3-LABEL: load_fold_add2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    addq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    addq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = add i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_add3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_add3:
 ; CHECK-O0:       # %bb.0:
@@ -466,9 +464,8 @@ define i64 @load_fold_add3(i64* %p1, i64
 ;
 ; CHECK-O3-LABEL: load_fold_add3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    addq %rcx, %rax
+; CHECK-O3-NEXT:    addq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -495,7 +492,6 @@ define i64 @load_fold_sub1(i64* %p) {
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_sub2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_sub2:
 ; CHECK-O0:       # %bb.0:
@@ -514,7 +510,6 @@ define i64 @load_fold_sub2(i64* %p, i64
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_sub3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_sub3:
 ; CHECK-O0:       # %bb.0:
@@ -526,8 +521,7 @@ define i64 @load_fold_sub3(i64* %p1, i64
 ; CHECK-O3-LABEL: load_fold_sub3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
-; CHECK-O3-NEXT:    subq %rcx, %rax
+; CHECK-O3-NEXT:    subq (%rsi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -553,7 +547,6 @@ define i64 @load_fold_mul1(i64* %p) {
   ret i64 %ret
 }
 
-; Legal, O0 is better than O3 codegen (TODO)
 define i64 @load_fold_mul2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_mul2:
 ; CHECK-O0:       # %bb.0:
@@ -563,15 +556,14 @@ define i64 @load_fold_mul2(i64* %p, i64
 ;
 ; CHECK-O3-LABEL: load_fold_mul2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    imulq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    imulq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = mul i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_mul3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_mul3:
 ; CHECK-O0:       # %bb.0:
@@ -582,9 +574,8 @@ define i64 @load_fold_mul3(i64* %p1, i64
 ;
 ; CHECK-O3-LABEL: load_fold_mul3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    imulq %rcx, %rax
+; CHECK-O3-NEXT:    imulq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -639,7 +630,6 @@ define i64 @load_fold_sdiv2(i64* %p, i64
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_sdiv3:
 ; CHECK-O0:       # %bb.0:
@@ -651,9 +641,8 @@ define i64 @load_fold_sdiv3(i64* %p1, i6
 ; CHECK-O3-LABEL: load_fold_sdiv3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    cqto
-; CHECK-O3-NEXT:    idivq %rcx
+; CHECK-O3-NEXT:    idivq (%rsi)
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -685,7 +674,6 @@ define i64 @load_fold_udiv1(i64* %p) {
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_udiv2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_udiv2:
 ; CHECK-O0:       # %bb.0:
@@ -706,7 +694,6 @@ define i64 @load_fold_udiv2(i64* %p, i64
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_udiv3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_udiv3:
 ; CHECK-O0:       # %bb.0:
@@ -719,9 +706,8 @@ define i64 @load_fold_udiv3(i64* %p1, i6
 ; CHECK-O3-LABEL: load_fold_udiv3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    xorl %edx, %edx
-; CHECK-O3-NEXT:    divq %rcx
+; CHECK-O3-NEXT:    divq (%rsi)
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -783,7 +769,6 @@ define i64 @load_fold_srem2(i64* %p, i64
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_srem3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_srem3:
 ; CHECK-O0:       # %bb.0:
@@ -796,9 +781,8 @@ define i64 @load_fold_srem3(i64* %p1, i6
 ; CHECK-O3-LABEL: load_fold_srem3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    cqto
-; CHECK-O3-NEXT:    idivq %rcx
+; CHECK-O3-NEXT:    idivq (%rsi)
 ; CHECK-O3-NEXT:    movq %rdx, %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
@@ -859,7 +843,6 @@ define i64 @load_fold_urem2(i64* %p, i64
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_urem3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_urem3:
 ; CHECK-O0:       # %bb.0:
@@ -873,9 +856,8 @@ define i64 @load_fold_urem3(i64* %p1, i6
 ; CHECK-O3-LABEL: load_fold_urem3:
 ; CHECK-O3:       # %bb.0:
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
 ; CHECK-O3-NEXT:    xorl %edx, %edx
-; CHECK-O3-NEXT:    divq %rcx
+; CHECK-O3-NEXT:    divq (%rsi)
 ; CHECK-O3-NEXT:    movq %rdx, %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
@@ -1101,7 +1083,6 @@ define i64 @load_fold_and1(i64* %p) {
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_and2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_and2:
 ; CHECK-O0:       # %bb.0:
@@ -1111,15 +1092,14 @@ define i64 @load_fold_and2(i64* %p, i64
 ;
 ; CHECK-O3-LABEL: load_fold_and2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    andq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    andq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = and i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_and3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_and3:
 ; CHECK-O0:       # %bb.0:
@@ -1130,9 +1110,8 @@ define i64 @load_fold_and3(i64* %p1, i64
 ;
 ; CHECK-O3-LABEL: load_fold_and3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    andq %rcx, %rax
+; CHECK-O3-NEXT:    andq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -1159,7 +1138,6 @@ define i64 @load_fold_or1(i64* %p) {
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_or2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_or2:
 ; CHECK-O0:       # %bb.0:
@@ -1169,15 +1147,14 @@ define i64 @load_fold_or2(i64* %p, i64 %
 ;
 ; CHECK-O3-LABEL: load_fold_or2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    orq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    orq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = or i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_or3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_or3:
 ; CHECK-O0:       # %bb.0:
@@ -1188,9 +1165,8 @@ define i64 @load_fold_or3(i64* %p1, i64*
 ;
 ; CHECK-O3-LABEL: load_fold_or3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    orq %rcx, %rax
+; CHECK-O3-NEXT:    orq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -1217,7 +1193,6 @@ define i64 @load_fold_xor1(i64* %p) {
   ret i64 %ret
 }
 
-; Legal, as expected
 define i64 @load_fold_xor2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_xor2:
 ; CHECK-O0:       # %bb.0:
@@ -1227,15 +1202,14 @@ define i64 @load_fold_xor2(i64* %p, i64
 ;
 ; CHECK-O3-LABEL: load_fold_xor2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    xorq %rsi, %rax
+; CHECK-O3-NEXT:    movq %rsi, %rax
+; CHECK-O3-NEXT:    xorq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = xor i64 %v, %v2
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i64 @load_fold_xor3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_xor3:
 ; CHECK-O0:       # %bb.0:
@@ -1246,9 +1220,8 @@ define i64 @load_fold_xor3(i64* %p1, i64
 ;
 ; CHECK-O3-LABEL: load_fold_xor3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rsi), %rax
-; CHECK-O3-NEXT:    xorq %rcx, %rax
+; CHECK-O3-NEXT:    xorq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
   %v2 = load atomic i64, i64* %p2 unordered, align 8
@@ -1256,7 +1229,6 @@ define i64 @load_fold_xor3(i64* %p1, i64
   ret i64 %ret
 }
 
-; Legal to fold (TODO)
 define i1 @load_fold_icmp1(i64* %p) {
 ; CHECK-O0-LABEL: load_fold_icmp1:
 ; CHECK-O0:       # %bb.0:
@@ -1268,8 +1240,7 @@ define i1 @load_fold_icmp1(i64* %p) {
 ;
 ; CHECK-O3-LABEL: load_fold_icmp1:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    cmpq $15, %rax
+; CHECK-O3-NEXT:    cmpq $15, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
@@ -1277,7 +1248,6 @@ define i1 @load_fold_icmp1(i64* %p) {
   ret i1 %ret
 }
 
-; Legal to fold (TODO)
 define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
 ; CHECK-O0-LABEL: load_fold_icmp2:
 ; CHECK-O0:       # %bb.0:
@@ -1289,8 +1259,7 @@ define i1 @load_fold_icmp2(i64* %p, i64
 ;
 ; CHECK-O3-LABEL: load_fold_icmp2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    cmpq %rsi, %rax
+; CHECK-O3-NEXT:    cmpq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
@@ -1298,7 +1267,6 @@ define i1 @load_fold_icmp2(i64* %p, i64
   ret i1 %ret
 }
 
-; Legal to fold (TODO)
 define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
 ; CHECK-O0-LABEL: load_fold_icmp3:
 ; CHECK-O0:       # %bb.0:
@@ -1311,9 +1279,8 @@ define i1 @load_fold_icmp3(i64* %p1, i64
 ;
 ; CHECK-O3-LABEL: load_fold_icmp3:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    movq (%rsi), %rcx
-; CHECK-O3-NEXT:    cmpq %rcx, %rax
+; CHECK-O3-NEXT:    movq (%rsi), %rax
+; CHECK-O3-NEXT:    cmpq %rax, (%rdi)
 ; CHECK-O3-NEXT:    sete %al
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p1 unordered, align 8
@@ -1441,9 +1408,8 @@ define void @rmw_fold_mul2(i64* %p, i64
 ;
 ; CHECK-O3-LABEL: rmw_fold_mul2:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    imulq %rsi, %rax
-; CHECK-O3-NEXT:    movq %rax, (%rdi)
+; CHECK-O3-NEXT:    imulq (%rdi), %rsi
+; CHECK-O3-NEXT:    movq %rsi, (%rdi)
 ; CHECK-O3-NEXT:    retq
   %prev = load atomic i64, i64* %p unordered, align 8
   %val = mul i64 %prev, %v
@@ -2070,9 +2036,8 @@ define i64 @load_forwarding(i64* %p) {
 ;
 ; CHECK-O3-LABEL: load_forwarding:
 ; CHECK-O3:       # %bb.0:
-; CHECK-O3-NEXT:    movq (%rdi), %rcx
 ; CHECK-O3-NEXT:    movq (%rdi), %rax
-; CHECK-O3-NEXT:    orq %rcx, %rax
+; CHECK-O3-NEXT:    orq (%rdi), %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %v2 = load atomic i64, i64* %p unordered, align 8




More information about the llvm-commits mailing list