[llvm] PowerPC32:PIC: Update to bcl to fix branch prediction mis-predict issue (PR #134140)

Lei Huang via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 4 13:45:55 PDT 2025


https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/134140

>From 6d0cb30449ecf6304ccd3b7eddf67366a826eecf Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 2 Apr 2025 14:54:52 -0400
Subject: [PATCH 1/4] PowerPC32:PIC: Update to bcl to fix branch prediction
 mis-predict issue

Update `bl` to `bcl 20, 31, .+4` for 32bit PIC code gen
so we don't corrupt the link stack and cause mis-predict for
the branch predictor.

fixes: https://github.com/llvm/llvm-project/issues/128644
---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp    |  8 +++---
 llvm/test/CodeGen/PowerPC/mcm-5.ll           |  4 +--
 llvm/test/CodeGen/PowerPC/ppc32-pic-bcl.ll   | 29 ++++++++++++++++++++
 llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll |  2 +-
 llvm/test/CodeGen/PowerPC/ppc32-pic.ll       |  4 +--
 5 files changed, 38 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/ppc32-pic-bcl.ll

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index ba6653e10bdc5..0aca8ec7567af 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -955,8 +955,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
             OutContext),
         MCConstantExpr::create(4, OutContext), OutContext);
 
-    // Emit the 'bl'.
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+    // Emit 'bcl 20,31,.+4' so the link stack is not corrupted.
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(OffsExpr));
     return;
   }
   case PPC::MovePCtoLR:
@@ -967,9 +967,9 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // L1$pb:
     MCSymbol *PICBase = MF->getPICBaseSymbol();
 
-    // Emit the 'bl'.
+    // Emit 'bcl 20,31,.+4' so the link stack is not corrupted.
     EmitToStreamer(*OutStreamer,
-                   MCInstBuilder(PPC::BL)
+                   MCInstBuilder(PPC::BCLalways)
                        // FIXME: We would like an efficient form for this, so we
                        // don't have to do a lot of extra uniquing.
                        .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
diff --git a/llvm/test/CodeGen/PowerPC/mcm-5.ll b/llvm/test/CodeGen/PowerPC/mcm-5.ll
index b88f405211b05..f9629b5710d72 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-5.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-5.ll
@@ -51,7 +51,7 @@ sw.epilog:                                        ; preds = %sw.bb3, %sw.default
   ret i32 %5
 }
 ; CHECK-LABEL: test_jump_table:
-; CHECK-NOT:       bl .L0$pb
+; CHECK-NOT:       bcl 20, 31, .L0$pb
 
 ; CHECK:       addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc at ha
 ; CHECK:       ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc at l([[REG1]])
@@ -64,7 +64,7 @@ sw.epilog:                                        ; preds = %sw.bb3, %sw.default
 ; CHECK-NEXT: .long	.LBB0_{{[0-9]+}}-.LJTI0_0
 
 ; LARGE-LABEL: test_jump_table:
-; LARGE:       bl .L0$pb
+; LARGE:       bcl 20, 31, .L0$pb
 ; LARGE-NEXT:  .L0$pb:
 ; LARGE:       mflr [[REGBASE:[0-9]+]]
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic-bcl.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic-bcl.ll
new file mode 100644
index 0000000000000..1e938b135fbfd
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic-bcl.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=powerpc -relocation-model=pic | \
+; RUN:    FileCheck -check-prefixes=SMALL %s
+
+ at val = global i8 0, align 1
+
+define zeroext i8 @testbcl() nounwind {
+; SMALL-LABEL: testbcl:
+; SMALL:       # %bb.0: # %entry
+; SMALL-NEXT:    mflr 0
+; SMALL-NEXT:    stwu 1, -16(1)
+; SMALL-NEXT:    stw 30, 8(1)
+; SMALL-NEXT:    stw 0, 20(1)
+; SMALL-NEXT:    bcl 20, 31, .L0$pb
+; SMALL-NEXT:  .L0$pb:
+; SMALL-NEXT:    mflr 30
+; SMALL-NEXT:    lwz 3, .L0$poff-.L0$pb(30)
+; SMALL-NEXT:    add 30, 3, 30
+; SMALL-NEXT:    lwz 3, .LC0-.LTOC(30)
+; SMALL-NEXT:    lbz 3, 0(3)
+; SMALL-NEXT:    lwz 0, 20(1)
+; SMALL-NEXT:    lwz 30, 8(1)
+; SMALL-NEXT:    addi 1, 1, 16
+; SMALL-NEXT:    mtlr 0
+; SMALL-NEXT:    blr
+entry:
+  %0 = load i8, ptr @val, align 1
+  ret i8 %0
+}
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
index 2f0b92964c13b..7be1a80b7af43 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
@@ -55,7 +55,7 @@ entry:
 ; LARGE-BSS-NEXT:  foo:
 ; LARGE-BSS:         stwu 1, -32(1)
 ; LARGE-BSS:         stw 30, 24(1)
-; LARGE-BSS:         bl [[PB]]
+; LARGE-BSS:         bcl 20, 31, [[PB]]
 ; LARGE-BSS-NEXT:  [[PB]]:
 ; LARGE-BSS:         mflr 30
 ; LARGE-BSS:         lwz [[REG:[0-9]+]], [[POFF]]-[[PB]](30)
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
index aed994144940c..9ffd7395ffeb5 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -18,8 +18,8 @@ entry:
 ; SMALL-LABEL: foo:
 ; SMALL:         stwu 1, -32(1)
 ; SMALL:         stw 30, 24(1)
-; SMALL-BSS:     bl _GLOBAL_OFFSET_TABLE_ at local-4
-; SMALL-SECURE:  bl .L0$pb
+; SMALL-BSS:     bcl 20, 31, _GLOBAL_OFFSET_TABLE_ at local-4
+; SMALL-SECURE:  bcl 20, 31, .L0$pb
 ; SMALL:         mflr 30
 ; SMALL-SECURE:  addis 30, 30, _GLOBAL_OFFSET_TABLE_-.L0$pb at ha
 ; SMALL-SECURE:  addi 30, 30, _GLOBAL_OFFSET_TABLE_-.L0$pb at l

>From 58906f473af33e87a2d297d443a138ad343c5532 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 2 Apr 2025 15:13:00 -0400
Subject: [PATCH 2/4] run clang-format

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 0aca8ec7567af..7ff91238f8608 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -956,7 +956,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         MCConstantExpr::create(4, OutContext), OutContext);
 
     // Emit 'bcl 20,31,.+4' so the link stack is not corrupted.
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(OffsExpr));
+    EmitToStreamer(*OutStreamer,
+                   MCInstBuilder(PPC::BCLalways).addExpr(OffsExpr));
     return;
   }
   case PPC::MovePCtoLR:

>From eb0e07d7ef1bbfb4b3902d962a39f79669fd9c14 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Fri, 4 Apr 2025 16:22:47 -0400
Subject: [PATCH 3/4] Remove update for PPC::MoveGOTtoLR pseudo

---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 5 ++---
 llvm/test/CodeGen/PowerPC/ppc32-pic.ll    | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 7ff91238f8608..f07331bf6c6b5 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -955,9 +955,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
             OutContext),
         MCConstantExpr::create(4, OutContext), OutContext);
 
-    // Emit 'bcl 20,31,.+4' so the link stack is not corrupted.
-    EmitToStreamer(*OutStreamer,
-                   MCInstBuilder(PPC::BCLalways).addExpr(OffsExpr));
+    // Emit the 'bl'.
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
     return;
   }
   case PPC::MovePCtoLR:
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
index 9ffd7395ffeb5..aed994144940c 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -18,8 +18,8 @@ entry:
 ; SMALL-LABEL: foo:
 ; SMALL:         stwu 1, -32(1)
 ; SMALL:         stw 30, 24(1)
-; SMALL-BSS:     bcl 20, 31, _GLOBAL_OFFSET_TABLE_ at local-4
-; SMALL-SECURE:  bcl 20, 31, .L0$pb
+; SMALL-BSS:     bl _GLOBAL_OFFSET_TABLE_ at local-4
+; SMALL-SECURE:  bl .L0$pb
 ; SMALL:         mflr 30
 ; SMALL-SECURE:  addis 30, 30, _GLOBAL_OFFSET_TABLE_-.L0$pb at ha
 ; SMALL-SECURE:  addi 30, 30, _GLOBAL_OFFSET_TABLE_-.L0$pb at l

>From aa188a35d3696a15fc0b4201e35dc49304a33af8 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Fri, 4 Apr 2025 16:45:41 -0400
Subject: [PATCH 4/4] restore ck for conversion of bl .+4

---
 llvm/test/CodeGen/PowerPC/ppc32-pic.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
index aed994144940c..f7d8df9133306 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -19,7 +19,7 @@ entry:
 ; SMALL:         stwu 1, -32(1)
 ; SMALL:         stw 30, 24(1)
 ; SMALL-BSS:     bl _GLOBAL_OFFSET_TABLE_ at local-4
-; SMALL-SECURE:  bl .L0$pb
+; SMALL-SECURE:  bcl 20, 31, .L0$pb
 ; SMALL:         mflr 30
 ; SMALL-SECURE:  addis 30, 30, _GLOBAL_OFFSET_TABLE_-.L0$pb at ha
 ; SMALL-SECURE:  addi 30, 30, _GLOBAL_OFFSET_TABLE_-.L0$pb at l



More information about the llvm-commits mailing list