[llvm] 14d4cdd - [X86] Don't zero out %eax if both %al and %ah are used
    Bill Wendling via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Tue Dec 13 15:08:56 PST 2022
    
    
  
Author: Bill Wendling
Date: 2022-12-13T15:06:53-08:00
New Revision: 14d4cddc5506fb0fd3c4ac556b4edd970aa151eb
URL: https://github.com/llvm/llvm-project/commit/14d4cddc5506fb0fd3c4ac556b4edd970aa151eb
DIFF: https://github.com/llvm/llvm-project/commit/14d4cddc5506fb0fd3c4ac556b4edd970aa151eb.diff
LOG: [X86] Don't zero out %eax if both %al and %ah are used
The iterator over super and sub registers doesn't include both 8-bit
registers in its list. So if both registers are used and only one of
them is live on return, then we need to make sure that the other 8-bit
register is also marked as live and not zeroed out.
Reviewed By: nickdesaulniers
Differential Revision: https://reviews.llvm.org/D139679
Added: 
    llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
Modified: 
    llvm/lib/CodeGen/PrologEpilogInserter.cpp
Removed: 
    
################################################################################
diff  --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index dbfd8b936da5..4e32c1049f0a 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1282,7 +1282,13 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
         if (!MO.isReg())
           continue;
 
-        for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg()))
+        MCRegister Reg = MO.getReg();
+
+        // This picks up sibling registers (e.q. %al -> %ah).
+        for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit)
+          RegsToZero.reset(*Unit);
+
+        for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
           RegsToZero.reset(SReg);
       }
     }
diff  --git a/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
new file mode 100644
index 000000000000..33e501ca8503
--- /dev/null
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs-i386.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -opaque-pointers | FileCheck %s --check-prefix=I386
+;
+; Make sure we don't zero out %eax when both %ah and %al are used.
+;
+; PR1766: https://github.com/ClangBuiltLinux/linux/issues/1766
+
+%struct.maple_subtree_state = type { ptr }
+
+ at mas_data_end_type = dso_local local_unnamed_addr global i32 0, align 4
+ at ma_meta_end_mn_0_0_0_0_0_0 = dso_local local_unnamed_addr global i8 0, align 1
+ at mt_pivots_0 = dso_local local_unnamed_addr global i8 0, align 1
+ at mas_data_end___trans_tmp_2 = dso_local local_unnamed_addr global ptr null, align 4
+ at mt_slots_0 = dso_local local_unnamed_addr global i8 0, align 1
+
+define dso_local zeroext i1 @test1(ptr nocapture noundef readonly %0) local_unnamed_addr "zero-call-used-regs"="used-gpr" nounwind {
+; I386-LABEL: test1:
+; I386:       # %bb.0:
+; I386-NEXT:    pushl %ebx
+; I386-NEXT:    subl $24, %esp
+; I386-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I386-NEXT:    movl (%eax), %eax
+; I386-NEXT:    movzbl (%eax), %ebx
+; I386-NEXT:    calll bar
+; I386-NEXT:    testb %al, %al
+; I386-NEXT:    # implicit-def: $al
+; I386-NEXT:    # kill: killed $al
+; I386-NEXT:    je .LBB0_6
+; I386-NEXT:  # %bb.1:
+; I386-NEXT:    cmpl $0, mas_data_end_type
+; I386-NEXT:    je .LBB0_3
+; I386-NEXT:  # %bb.2:
+; I386-NEXT:    movzbl ma_meta_end_mn_0_0_0_0_0_0, %eax
+; I386-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT:    jmp .LBB0_6
+; I386-NEXT:  .LBB0_3:
+; I386-NEXT:    movb mt_pivots_0, %ah
+; I386-NEXT:    movb %ah, %al
+; I386-NEXT:    decb %al
+; I386-NEXT:    movl mas_data_end___trans_tmp_2, %ecx
+; I386-NEXT:    movsbl %al, %edx
+; I386-NEXT:    cmpl $0, (%ecx,%edx,4)
+; I386-NEXT:    je .LBB0_5
+; I386-NEXT:  # %bb.4:
+; I386-NEXT:    movb %al, %ah
+; I386-NEXT:  .LBB0_5:
+; I386-NEXT:    movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I386-NEXT:  .LBB0_6:
+; I386-NEXT:    movb mt_slots_0, %bh
+; I386-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; I386-NEXT:    movl %eax, (%esp)
+; I386-NEXT:    calll baz
+; I386-NEXT:    subl $4, %esp
+; I386-NEXT:    cmpb %bh, %bl
+; I386-NEXT:    jae .LBB0_8
+; I386-NEXT:  # %bb.7:
+; I386-NEXT:    movsbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; I386-NEXT:    movl %eax, (%esp)
+; I386-NEXT:    calll gaz
+; I386-NEXT:  .LBB0_8:
+; I386-NEXT:    movb $1, %al
+; I386-NEXT:    addl $24, %esp
+; I386-NEXT:    popl %ebx
+; I386-NEXT:    xorl %ecx, %ecx
+; I386-NEXT:    xorl %edx, %edx
+; I386-NEXT:    retl
+  %2 = alloca %struct.maple_subtree_state, align 4
+  %3 = load ptr, ptr %0, align 4
+  %4 = load i8, ptr %3, align 1
+  %5 = tail call zeroext i1 @bar()
+  br i1 %5, label %6, label %20
+
+6:                                                ; preds = %1
+  %7 = load i32, ptr @mas_data_end_type, align 4
+  %8 = icmp eq i32 %7, 0
+  br i1 %8, label %11, label %9
+
+9:                                                ; preds = %6
+  %10 = load i8, ptr @ma_meta_end_mn_0_0_0_0_0_0, align 1
+  br label %20
+
+11:                                               ; preds = %6
+  %12 = load i8, ptr @mt_pivots_0, align 1
+  %13 = add i8 %12, -1
+  %14 = load ptr, ptr @mas_data_end___trans_tmp_2, align 4
+  %15 = sext i8 %13 to i32
+  %16 = getelementptr inbounds [1 x i32], ptr %14, i32 0, i32 %15
+  %17 = load i32, ptr %16, align 4
+  %18 = icmp eq i32 %17, 0
+  %19 = select i1 %18, i8 %12, i8 %13
+  br label %20
+
+20:                                               ; preds = %11, %9, %1
+  %21 = phi i8 [ undef, %1 ], [ %10, %9 ], [ %19, %11 ]
+  %22 = load i8, ptr @mt_slots_0, align 1
+  call void @baz(ptr nonnull sret(%struct.maple_subtree_state) align 4 %2)
+  %23 = icmp ult i8 %4, %22
+  br i1 %23, label %24, label %25
+
+24:                                               ; preds = %20
+  call void @gaz(i8 noundef signext %21)
+  br label %25
+
+25:                                               ; preds = %20, %24
+  ret i1 true
+}
+
+declare dso_local zeroext i1 @bar(...) local_unnamed_addr
+
+declare dso_local void @baz(ptr sret(%struct.maple_subtree_state) align 4, ...) local_unnamed_addr
+
+declare dso_local void @gaz(i8 noundef signext) local_unnamed_addr
        
    
    
More information about the llvm-commits
mailing list