[llvm] 6f6d2d2 - [x86] refine conditions for immediate hoisting to save code-size

Tue Jun 9 12:45:03 PDT 2020

Author: Sanjay Patel
Date: 2020-06-09T15:44:55-04:00
New Revision: 6f6d2d238360883039cd17986c9ef598d04995a3

URL: https://github.com/llvm/llvm-project/commit/6f6d2d238360883039cd17986c9ef598d04995a3
DIFF: https://github.com/llvm/llvm-project/commit/6f6d2d238360883039cd17986c9ef598d04995a3.diff

LOG: [x86] refine conditions for immediate hoisting to save code-size

As shown in PR46237:
https://bugs.llvm.org/show_bug.cgi?id=46237

The size-savings win for hoisting an 8-bit ALU immediate (intentionally
excluding store constants) requires extreme conditions; it may not even
be possible when including REX prefix bytes on x86-64.

I did draft a version of this patch that included use counts after the
loop, but I suspect that accounting is not working as expected. I think
that is because the number of constant uses are changing as we select
instructions (for example as we transform shl/add into LEA).

Differential Revision: https://reviews.llvm.org/D81468

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/test/CodeGen/X86/immediate_merging.ll
    llvm/test/CodeGen/X86/immediate_merging64.ll
    llvm/test/CodeGen/X86/pr27202.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2171be293914..fadcb173cd4b 100644

--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -364,9 +364,10 @@ namespace {
         if (User->getNumOperands() != 2)
           continue;
 
-        // If this can match to INC/DEC, don't count it as a use.
-        if (User->getOpcode() == ISD::ADD &&
-            (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0))))
+        // If this is a sign-extended 8-bit integer immediate used in an ALU
+        // instruction, there is probably an opcode encoding to save space.
+        auto *C = dyn_cast<ConstantSDNode>(N);
+        if (C && isInt<8>(C->getSExtValue()))
           continue;
 
         // Immediates that are used for offsets as part of stack

diff  --git a/llvm/test/CodeGen/X86/immediate_merging.ll b/llvm/test/CodeGen/X86/immediate_merging.ll
index 1bed1014f94e..038c56f6dd5d 100644
--- a/llvm/test/CodeGen/X86/immediate_merging.ll
+++ b/llvm/test/CodeGen/X86/immediate_merging.ll
@@ -12,16 +12,16 @@
 @i = common global i32 0, align 4
 
 ; Test -Os to make sure immediates with multiple users don't get pulled in to
-; instructions.
+; instructions (8-bit immediates are exceptions).
+
 define i32 @foo() optsize {
 ; X86-LABEL: foo:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X86-NEXT:    movl %eax, a
 ; X86-NEXT:    movl %eax, b
-; X86-NEXT:    movl $12, %eax
-; X86-NEXT:    movl %eax, c
-; X86-NEXT:    cmpl %eax, e
+; X86-NEXT:    movl $12, c
+; X86-NEXT:    cmpl $12, e
 ; X86-NEXT:    jne .LBB0_2
 ; X86-NEXT:  # %bb.1: # %if.then
 ; X86-NEXT:    movl $1, x
@@ -38,9 +38,8 @@ define i32 @foo() optsize {
 ; X64-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    movl $12, %eax
-; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    cmpl %eax, {{.*}}(%rip)
+; X64-NEXT:    movl $12, {{.*}}(%rip)
+; X64-NEXT:    cmpl $12, {{.*}}(%rip)
 ; X64-NEXT:    jne .LBB0_2
 ; X64-NEXT:  # %bb.1: # %if.then
 ; X64-NEXT:    movl $1, {{.*}}(%rip)
@@ -74,16 +73,16 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 ; Test PGSO to make sure immediates with multiple users don't get pulled in to
-; instructions.
+; instructions (8-bit immediates are exceptions).
+
 define i32 @foo_pgso() !prof !14 {
 ; X86-LABEL: foo_pgso:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X86-NEXT:    movl %eax, a
 ; X86-NEXT:    movl %eax, b
-; X86-NEXT:    movl $12, %eax
-; X86-NEXT:    movl %eax, c
-; X86-NEXT:    cmpl %eax, e
+; X86-NEXT:    movl $12, c
+; X86-NEXT:    cmpl $12, e
 ; X86-NEXT:    jne .LBB1_2
 ; X86-NEXT:  # %bb.1: # %if.then
 ; X86-NEXT:    movl $1, x
@@ -100,9 +99,8 @@ define i32 @foo_pgso() !prof !14 {
 ; X64-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    movl $12, %eax
-; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    cmpl %eax, {{.*}}(%rip)
+; X64-NEXT:    movl $12, {{.*}}(%rip)
+; X64-NEXT:    cmpl $12, {{.*}}(%rip)
 ; X64-NEXT:    jne .LBB1_2
 ; X64-NEXT:  # %bb.1: # %if.then
 ; X64-NEXT:    movl $1, {{.*}}(%rip)

diff  --git a/llvm/test/CodeGen/X86/immediate_merging64.ll b/llvm/test/CodeGen/X86/immediate_merging64.ll
index a807a119e893..d355bea1603a 100644
--- a/llvm/test/CodeGen/X86/immediate_merging64.ll
+++ b/llvm/test/CodeGen/X86/immediate_merging64.ll
@@ -5,13 +5,13 @@
 ; 32-bit immediates are merged for code size savings.
 
 ; Immediates with multiple users should not be pulled into instructions when
-; optimizing for code size.
+; optimizing for code size (but 8-bit immediates are exceptions).
+
 define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
 ; CHECK-LABEL: imm_multiple_users:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    movq %rax, (%rsi)
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    movq $-1, (%rsi)
+; CHECK-NEXT:    cmpq $-1, %rdi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   store i64 -1, i64* %b, align 8
@@ -22,9 +22,8 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
 define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 {
 ; CHECK-LABEL: imm_multiple_users_pgso:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    movq %rax, (%rsi)
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    movq $-1, (%rsi)
+; CHECK-NEXT:    cmpq $-1, %rdi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   store i64 -1, i64* %b, align 8

diff  --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll
index ea5781ed8c5f..bb6be1d1685d 100644
--- a/llvm/test/CodeGen/X86/pr27202.ll
+++ b/llvm/test/CodeGen/X86/pr27202.ll
@@ -14,12 +14,14 @@ define i1 @foo(i32 %i) optsize {
   ret i1 %cmp
 }
 
+; 8-bit ALU immediates probably have small encodings.
+; We do not want to hoist the constant into a register here.
+
 define zeroext i1 @g(i32 %x) optsize {
 ; CHECK-LABEL: g:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    cmpl $1, %edi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %t0 = or i32 %x, 1
@@ -27,7 +29,7 @@ define zeroext i1 @g(i32 %x) optsize {
   ret i1 %t1
 }
 
-; 8-bit immediates probably have small encodings.
+; 8-bit ALU immediates probably have small encodings.
 ; We do not want to hoist the constant into a register here.
 
 define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
@@ -36,9 +38,8 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
 ; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    shll $6, %eax
 ; CHECK-NEXT:    movzbl %al, %ecx
-; CHECK-NEXT:    movl $7, %eax
-; CHECK-NEXT:    andq %rax, %rsi
-; CHECK-NEXT:    andq %rax, %rdx
+; CHECK-NEXT:    andl $7, %esi
+; CHECK-NEXT:    andl $7, %edx
 ; CHECK-NEXT:    leaq (%rdx,%rsi,8), %rax
 ; CHECK-NEXT:    orq %rcx, %rax
 ; CHECK-NEXT:    retq