[llvm] 5e28d30 - [XCOFF][AIX] Peephole optimization for toc-data.

Sean Fertile via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 13 17:41:17 PDT 2023


Author: Sean Fertile
Date: 2023-07-13T20:40:09-04:00
New Revision: 5e28d30f1fb10faf2db2f8bf0502e7fd72e6ac2e

URL: https://github.com/llvm/llvm-project/commit/5e28d30f1fb10faf2db2f8bf0502e7fd72e6ac2e
DIFF: https://github.com/llvm/llvm-project/commit/5e28d30f1fb10faf2db2f8bf0502e7fd72e6ac2e.diff

LOG: [XCOFF][AIX] Peephole optimization for toc-data.

Followup to D101178 - peephole optimization that converts a
load address instruction and a consuming load/store into just the
load/store when its safe to do so.

eg: converts the 2 instruction code sequence
  la 4, i[TD](2)
  stw 3, 0(4)
to
  stw 3, i[TD](2)

Differential Revision: https://reviews.llvm.org/D101470

Added: 
    llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
    llvm/test/CodeGen/PowerPC/toc-data-const.ll
    llvm/test/CodeGen/PowerPC/toc-data.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 96fd83ab6a7bc8..0ebfc007b3d7d7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -7633,6 +7633,20 @@ void PPCDAGToDAGISel::PeepholePPC64() {
     case PPC::ADDItocL:
       Flags = PPCII::MO_TOC_LO;
       break;
+    case PPC::ADDItoc:
+    case PPC::ADDItoc8:
+      if (RequiresMod4Offset) {
+        if (GlobalAddressSDNode *GA =
+                dyn_cast<GlobalAddressSDNode>(Base.getOperand(0))) {
+          const GlobalValue *GV = GA->getGlobal();
+          Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
+          // XMC_TD global that is underaligned being accessed with a DS form
+          // instruction.
+          if (Alignment < 4)
+            continue;
+        }
+      }
+      break;
     }
 
     SDValue ImmOpnd = Base.getOperand(1);
@@ -7727,12 +7741,27 @@ void PPCDAGToDAGISel::PeepholePPC64() {
       }
     }
 
+    const unsigned BaseOpcode = Base.getMachineOpcode();
+    // ADDItoc and ADDItoc8 are pseudos used exclusively by AIX small code
+    // model when a global is defined in the TOC.
+    const bool OpcodeIsAIXTocData =
+        BaseOpcode == PPC::ADDItoc || BaseOpcode == PPC::ADDItoc8;
+
     if (FirstOp == 1) // Store
-      (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
-                                       Base.getOperand(0), N->getOperand(3));
+      if (OpcodeIsAIXTocData)
+        (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+                                         Base.getOperand(0), Base.getOperand(1),
+                                         N->getOperand(3));
+      else
+        (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+                                         Base.getOperand(0), N->getOperand(3));
     else // Load
-      (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
-                                       N->getOperand(2));
+      if (OpcodeIsAIXTocData)
+        (void)CurDAG->UpdateNodeOperands(N, Base.getOperand(0),
+                                         Base.getOperand(1), N->getOperand(2));
+      else
+        (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+                                         N->getOperand(2));
 
     if (UpdateHBase)
       (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),

diff  --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 976effb96adc0d..1f7dba66db3558 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -42,6 +42,10 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
   } else {
     const GlobalValue *GV = MO.getGlobal();
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+      if (GVar->hasAttribute("toc-data"))
+        return TM.getSymbol(GV);
+
     TM.getNameWithPrefix(Name, GV, Mang);
   }
 

diff  --git a/llvm/test/CodeGen/PowerPC/toc-data-const.ll b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
index 740032e26a432e..5ec19cbbff549e 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data-const.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
@@ -1,5 +1,8 @@
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,NOOPT
+
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,OPT
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,OPT
 
 ; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o
 ; RUN: llvm-readobj %t32.o --syms --relocs | FileCheck %s --check-prefix=OBJ32
@@ -23,27 +26,31 @@ define ptr @retptr() {
 
 attributes #0 = { "toc-data" }
 
-; CHECK:       .read:
-; CHECK:        la 3, i1[TD](2)
+; CHECK-LABEL: .read:
+; NOOPT:         la 3, i1[TD](2)
+; NOOPT:         lwz 3, 0(3)
+; OPT:           lwz 3, i1[TD](2)
 
-; CHECK:       .retptr:
-; CHECK:        la 3, i2[TD](2)
+; CHECK-LABEL: .retptr:
+; CHECK:         la 3, i2[TD](2)
+; CHECK-NEXT:    blr
 
 ; CHECK-DAG:   .toc
 ; CHECK:         .extern i1[TD]
-; CHECK:         .csect i2[TD]
+; CHECK32:       .csect i2[TD],2
+; CHECK64:       .csect i2[TD],3
 
 ; OBJ32:      Relocations [
 ; OBJ32-NEXT:   Section (index: 1) .text {
 ; OBJ32-NEXT:     0x2 R_TOC i1(1) 0xF
-; OBJ32-NEXT:     0x26 R_TOC i2(15) 0xF
+; OBJ32-NEXT:     0x22 R_TOC i2(15) 0xF
 ; OBJ32-NEXT:   }
 ; OBJ32-NEXT:   Section (index: 2) .data {
-; OBJ32-NEXT:     0x44 R_POS .read(5) 0x1F
-; OBJ32-NEXT:     0x48 R_POS TOC(13) 0x1F
-; OBJ32-NEXT:     0x50 R_POS .retptr(7) 0x1F
-; OBJ32-NEXT:     0x54 R_POS TOC(13) 0x1F
-; OBJ32-NEXT:     0x5C R_POS i1(1) 0x1F
+; OBJ32-NEXT:     0x40 R_POS .read(5) 0x1F
+; OBJ32-NEXT:     0x44 R_POS TOC(13) 0x1F
+; OBJ32-NEXT:     0x4C R_POS .retptr(7) 0x1F
+; OBJ32-NEXT:     0x50 R_POS TOC(13) 0x1F
+; OBJ32-NEXT:     0x58 R_POS i1(1) 0x1F
 ; OBJ32-NEXT:   }
 ; OBJ32-NEXT: ]
 
@@ -70,7 +77,7 @@ attributes #0 = { "toc-data" }
 ; OBJ32:      Symbol {
 ; OBJ32:        Index: 13
 ; OBJ32-NEXT:   Name: TOC
-; OBJ32-NEXT:   Value (RelocatableAddress): 0x5C
+; OBJ32-NEXT:   Value (RelocatableAddress): 0x58
 ; OBJ32-NEXT:   Section: .data
 ; OBJ32-NEXT:   Type: 0x0
 ; OBJ32-NEXT:   StorageClass: C_HIDEXT (0x6B)
@@ -90,7 +97,7 @@ attributes #0 = { "toc-data" }
 ; OBJ32:      Symbol {
 ; OBJ32:        Index: 15
 ; OBJ32-NEXT:   Name: i2
-; OBJ32-NEXT:   Value (RelocatableAddress): 0x5C
+; OBJ32-NEXT:   Value (RelocatableAddress): 0x58
 ; OBJ32-NEXT:   Section: .data
 ; OBJ32-NEXT:   Type: 0x0
 ; OBJ32-NEXT:   StorageClass: C_EXT (0x2)
@@ -111,14 +118,14 @@ attributes #0 = { "toc-data" }
 ; OBJ64:      Relocations [
 ; OBJ64-NEXT:   Section (index: 1) .text {
 ; OBJ64-NEXT:     0x2 R_TOC i1(1) 0xF
-; OBJ64-NEXT:     0x26 R_TOC i2(15) 0xF
+; OBJ64-NEXT:     0x22 R_TOC i2(15) 0xF
 ; OBJ64-NEXT:   }
 ; OBJ64-NEXT:   Section (index: 2) .data {
-; OBJ64-NEXT:     0x48 R_POS .read(5) 0x3F
-; OBJ64-NEXT:     0x50 R_POS TOC(13) 0x3F
-; OBJ64-NEXT:     0x60 R_POS .retptr(7) 0x3F
-; OBJ64-NEXT:     0x68 R_POS TOC(13) 0x3F
-; OBJ64-NEXT:     0x78 R_POS i1(1) 0x3F
+; OBJ64-NEXT:     0x40 R_POS .read(5) 0x3F
+; OBJ64-NEXT:     0x48 R_POS TOC(13) 0x3F
+; OBJ64-NEXT:     0x58 R_POS .retptr(7) 0x3F
+; OBJ64-NEXT:     0x60 R_POS TOC(13) 0x3F
+; OBJ64-NEXT:     0x70 R_POS i1(1) 0x3F
 ; OBJ64-NEXT:   }
 ; OBJ64-NEXT: ]
 
@@ -144,7 +151,7 @@ attributes #0 = { "toc-data" }
 ; OBJ64:      Symbol {
 ; OBJ64:        Index: 13
 ; OBJ64-NEXT:   Name: TOC
-; OBJ64-NEXT:   Value (RelocatableAddress): 0x78
+; OBJ64-NEXT:   Value (RelocatableAddress): 0x70
 ; OBJ64-NEXT:   Section: .data
 ; OBJ64-NEXT:   Type: 0x0
 ; OBJ64-NEXT:   StorageClass: C_HIDEXT (0x6B)
@@ -163,7 +170,7 @@ attributes #0 = { "toc-data" }
 ; OBJ64:      Symbol {
 ; OBJ64:        Index: 15
 ; OBJ64-NEXT:   Name: i2
-; OBJ64-NEXT:   Value (RelocatableAddress): 0x78
+; OBJ64-NEXT:   Value (RelocatableAddress): 0x70
 ; OBJ64-NEXT:   Section: .data
 ; OBJ64-NEXT:   Type: 0x0
 ; OBJ64-NEXT:   StorageClass: C_EXT (0x2)
@@ -181,15 +188,15 @@ attributes #0 = { "toc-data" }
 ; OBJ64-NEXT: }
 
 ; DIS32:      00000000 <.read>:
-; DIS32-NEXT:        0: 38 62 00 00  	addi 3, 2, 0
+; DIS32-NEXT:        0: 80 62 00 00   lwz 3, 0(2)
 ; DIS32-NEXT:                         00000002:  R_TOC	i1
-; DIS32:      00000024 <.retptr>:
-; DIS32-NEXT:       24: 38 62 00 00  	addi 3, 2, 0
-; DIS32-NEXT:                         00000026:  R_TOC	i2
+; DIS32:      00000020 <.retptr>:
+; DIS32-NEXT:       20: 38 62 00 00  	addi 3, 2, 0
+; DIS32-NEXT:                         00000022:  R_TOC	i2
 
 ; DIS64:      0000000000000000 <.read>:
-; DIS64-NEXT:        0: 38 62 00 00  	addi 3, 2, 0
+; DIS64-NEXT:        0: 80 62 00 00  	lwz 3, 0(2)
 ; DIS64-NEXT:                         0000000000000002:  R_TOC	i1
-; DIS64:      0000000000000024 <.retptr>:
-; DIS64-NEXT:       24: 38 62 00 00  	addi 3, 2, 0
-; DIS64-NEXT:                         0000000000000026:  R_TOC	i2
+; DIS64:      0000000000000020 <.retptr>:
+; DIS64-NEXT:       20: 38 62 00 00  	addi 3, 2, 0
+; DIS64-NEXT:                         0000000000000022:  R_TOC	i2

diff  --git a/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll
new file mode 100644
index 00000000000000..8ec5d9fd331750
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s
+
+ at underaligned = dso_local global i32 123, align 1 #0
+
+define i64 @read() {
+entry:
+  %0  = load i32, ptr @underaligned, align 1
+  %1 = sext i32 %0 to i64
+  ret i64 %1
+}
+
+attributes #0 = { "toc-data"  }
+
+; CHECK-LABEL: .read
+; CHECK:       la [[DEF:[0-9]+]], underaligned[TD](2)
+; CHCEK:       lwa {{[0-9]+}}, 0([[DEF]])

diff  --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll
index cbf3be9fcaad05..20031de8a69912 100644
--- a/llvm/test/CodeGen/PowerPC/toc-data.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-data.ll
@@ -1,16 +1,16 @@
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s \
-; RUN:     -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK32
+; RUN:     -stop-before=ppc-vsx-copy | FileCheck %s --check-prefixes=CHECK32,OPT32
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
 ; RUN:     -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK64
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST64
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TEST32,ASMOPT32
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TEST64,ASMOPT64
 
 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s \
-; RUN:     -stop-before=ppc-vsx-copy -O0  | FileCheck %s --check-prefix CHECK32
+; RUN:     -stop-before=ppc-vsx-copy -O0  | FileCheck %s --check-prefixes=CHECK32,NOOPT32
 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \
 ; RUN:     -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK64-NOOPT
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefixes=TEST32,ASMNOOPT32
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefixes=TEST64,ASMNOOPT64
 
 @i = dso_local global i32 0, align 4 #0
 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8
@@ -24,16 +24,17 @@ define dso_local void @write_int(i32 signext %in) {
     ret void
 }
 ; CHECK32: name:            write_int
-; CHECK32:      %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @i, $r2
-; CHECK32-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
+; NOOPT32:      %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @i, $r2
+; NOOPT32-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
+; OPT32:        STW %{{[0-9]+}}, @i, $r2 :: (store (s32) into @i)
 
 ; TEST32:         .write_int:
-; TEST32:           la 4, i[TD](2)
-; TEST32-NEXT:      stw 3, 0(4)
+; ASMNOOPT32:       la 4, i[TD](2)
+; ASMNOOPT32-NEXT:  stw 3, 0(4)
+; ASMOPT32:         stw 3, i[TD](2)
 
 ; CHECK64: name:            write_int
-; CHECK64:      %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2
-; CHECK64-NEXT: STW8 %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i)
+; CHECK64:      STW8 %{{[0-9]+}}, @i, $x2  :: (store (s32) into @i)
 
 ; CHECK64-NOOPT:  name: write_int
 ; CHECK64-NOOPT:    %[[SUBREG:[0-9]+]]:gprc = COPY %{{[0-9]}}.sub_32
@@ -41,9 +42,9 @@ define dso_local void @write_int(i32 signext %in) {
 ; CHECK64-NOOPT:    STW %[[SUBREG]], 0, killed %[[ADDR]] :: (store (s32) into @i)
 
 ; TEST64:         .write_int:
-; TEST64:           la 4, i[TD](2)
-; TEST64-NEXT:      stw 3, 0(4)
-
+; ASMNOOPT64:       la 4, i[TD](2)
+; ASMNOOPT64-NEXT:  stw 3, 0(4)
+; ASMOPT64:         stw 3, i[TD](2)
 
 define dso_local i64 @read_ll() {
   entry:
@@ -77,25 +78,26 @@ define dso_local float @read_float() {
     ret float %0
 }
 ; CHECK32: name:            read_float
-; CHECK32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @f, $r2
-; CHECK32: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
+; NOOPT32: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @f, $r2
+; NOOPT32: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
+; OPT32:   %{{[0-9]+}}:f4rc = LFS @f, $r2 :: (dereferenceable load (s32) from @f)
 
-; TEST32:       .read_float:
-; TEST32:         la 3, f[TD](2)
-; TEST32-NEXT:    lfs 1, 0(3)
+; TEST32:           .read_float:
+; ASMNOOPT32:         la 3, f[TD](2)
+; ASMNOOPT32-NEXT:    lfs 1, 0(3)
+; ASMOPT32:           lfs 1, f[TD](2)
 
 ; CHECK64: name:            read_float
-; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
-; CHECK64: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f)
+; CHECK64: %{{[0-9]+}}:f4rc = LFS @f, $x2 :: (dereferenceable load (s32) from @f)
 
 ; CHECK64-NOOPT: name:            read_float
 ; CHECK64-NOOPT:   %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2
 ; CHECK64-NOOPT:   %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]]
 
-; TEST64:       .read_float:
-; TEST64:         la 3, f[TD](2)
-; TEST64-NEXT:    lfs 1, 0(3)
-
+; TEST64:          .read_float:
+; ASMNOOPT64:        la 3, f[TD](2)
+; ASMNOOPT64-NEXT:   lfs 1, 0(3)
+; ASMOPT64:          lfs 1, f[TD](2)
 
 define dso_local void @write_double(double %in) {
   entry:


        


More information about the llvm-commits mailing list