[llvm] 45ad207 - [PowerPC] Add fix to partword atomic operations

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Thu May 20 10:36:44 PDT 2021


Author: Stefan Pintilie
Date: 2021-05-20T12:36:37-05:00
New Revision: 45ad207e4585ff18ed11509991bf8512f2466818

URL: https://github.com/llvm/llvm-project/commit/45ad207e4585ff18ed11509991bf8512f2466818
DIFF: https://github.com/llvm/llvm-project/commit/45ad207e4585ff18ed11509991bf8512f2466818.diff

LOG: [PowerPC] Add fix to partword atomic operations

Partword atomic binaries are not zero extended as they should be.
This patch fixes them to ensure that they are zero extended.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D102819

Added: 
    llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/atomics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e44c5f8754761..2e681a4fed30c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11225,6 +11225,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+  Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
   Register Ptr1Reg;
   Register TmpReg =
       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
@@ -11252,7 +11253,8 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
   //   stwcx. tmp4, ptr
   //   bne- loopMBB
   //   fallthrough --> exitMBB
-  //   srw dest, tmpDest, shift
+  //   srw SrwDest, tmpDest, shift
+  //   rlwinm SrwDest, SrwDest, 0, 24 [16], 31
   if (ptrA != ZeroReg) {
     Ptr1Reg = RegInfo.createVirtualRegister(RC);
     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
@@ -11354,7 +11356,14 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
   //  exitMBB:
   //   ...
   BB = exitMBB;
-  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+  // Since the shift amount is not a constant, we need to clear
+  // the upper bits with a separate RLWINM.
+  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
+      .addReg(SrwDestReg)
+      .addImm(0)
+      .addImm(is8bit ? 24 : 16)
+      .addImm(31);
+  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
       .addReg(TmpDestReg)
       .addReg(ShiftReg);
   return BB;

diff  --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 008cd4c7157c1..1cb6708380369 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -360,6 +360,7 @@ define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
 ; PPC32-NEXT:    bne cr0, .LBB12_1
 ; PPC32-NEXT:  # %bb.2:
 ; PPC32-NEXT:    srw r3, r7, r3
+; PPC32-NEXT:    clrlwi r3, r3, 24
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: add_i8_monotonic:
@@ -380,6 +381,7 @@ define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
 ; PPC64-NEXT:    bne cr0, .LBB12_1
 ; PPC64-NEXT:  # %bb.2:
 ; PPC64-NEXT:    srw r3, r7, r3
+; PPC64-NEXT:    clrlwi r3, r3, 24
 ; PPC64-NEXT:    blr
   %val = atomicrmw add i8* %mem, i8 %operand monotonic
   ret i8 %val
@@ -405,6 +407,7 @@ define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
 ; PPC32-NEXT:    bne cr0, .LBB13_1
 ; PPC32-NEXT:  # %bb.2:
 ; PPC32-NEXT:    srw r3, r7, r3
+; PPC32-NEXT:    clrlwi r3, r3, 16
 ; PPC32-NEXT:    lwsync
 ; PPC32-NEXT:    blr
 ;
@@ -428,6 +431,7 @@ define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
 ; PPC64-NEXT:    bne cr0, .LBB13_1
 ; PPC64-NEXT:  # %bb.2:
 ; PPC64-NEXT:    srw r3, r7, r3
+; PPC64-NEXT:    clrlwi r3, r3, 16
 ; PPC64-NEXT:    lwsync
 ; PPC64-NEXT:    blr
   %val = atomicrmw xor i16* %mem, i16 %operand seq_cst

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll b/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll
new file mode 100644
index 0000000000000..13564d6865e67
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 %s -o - | FileCheck %s --check-prefix=PWR7
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr9 %s -o - | FileCheck %s --check-prefix=PWR9
+
+ at value8 = dso_local global { i8 } zeroinitializer, align 1
+ at value16 = dso_local global { i16 } zeroinitializer, align 2
+ at global_int = dso_local local_unnamed_addr global i32 0, align 4
+
+define dso_local zeroext i32 @testI8(i8 zeroext %val) local_unnamed_addr #0 {
+; PWR7-LABEL: testI8:
+; PWR7:       # %bb.0: # %entry
+; PWR7-NEXT:    addis 4, 2, value8 at toc@ha
+; PWR7-NEXT:    li 6, 255
+; PWR7-NEXT:    sync
+; PWR7-NEXT:    addi 5, 4, value8 at toc@l
+; PWR7-NEXT:    rlwinm 4, 5, 3, 27, 28
+; PWR7-NEXT:    rldicr 5, 5, 0, 61
+; PWR7-NEXT:    xori 4, 4, 24
+; PWR7-NEXT:    slw 7, 3, 4
+; PWR7-NEXT:    slw 3, 6, 4
+; PWR7-NEXT:    and 6, 7, 3
+; PWR7-NEXT:  .LBB0_1: # %entry
+; PWR7-NEXT:    #
+; PWR7-NEXT:    lwarx 7, 0, 5
+; PWR7-NEXT:    andc 8, 7, 3
+; PWR7-NEXT:    or 8, 6, 8
+; PWR7-NEXT:    stwcx. 8, 0, 5
+; PWR7-NEXT:    bne 0, .LBB0_1
+; PWR7-NEXT:  # %bb.2: # %entry
+; PWR7-NEXT:    srw 3, 7, 4
+; PWR7-NEXT:    addis 5, 2, global_int at toc@ha
+; PWR7-NEXT:    lwsync
+; PWR7-NEXT:    clrlwi 4, 3, 24
+; PWR7-NEXT:    li 3, 55
+; PWR7-NEXT:    stw 4, global_int at toc@l(5)
+; PWR7-NEXT:    blr
+;
+; PWR9-LABEL: testI8:
+; PWR9:       # %bb.0: # %entry
+; PWR9-NEXT:    addis 4, 2, value8 at toc@ha
+; PWR9-NEXT:    sync
+; PWR9-NEXT:    addi 5, 4, value8 at toc@l
+; PWR9-NEXT:  .LBB0_1: # %entry
+; PWR9-NEXT:    #
+; PWR9-NEXT:    lbarx 4, 0, 5
+; PWR9-NEXT:    stbcx. 3, 0, 5
+; PWR9-NEXT:    bne 0, .LBB0_1
+; PWR9-NEXT:  # %bb.2: # %entry
+; PWR9-NEXT:    clrlwi 3, 4, 24
+; PWR9-NEXT:    addis 4, 2, global_int at toc@ha
+; PWR9-NEXT:    lwsync
+; PWR9-NEXT:    stw 3, global_int at toc@l(4)
+; PWR9-NEXT:    li 3, 55
+; PWR9-NEXT:    blr
+entry:
+  %0 = atomicrmw xchg i8* getelementptr inbounds ({ i8 }, { i8 }* @value8, i64 0, i32 0), i8 %val seq_cst, align 1
+  %conv = zext i8 %0 to i32
+  store i32 %conv, i32* @global_int, align 4
+  ret i32 55
+}
+
+define dso_local zeroext i32 @testI16(i16 zeroext %val) local_unnamed_addr #0 {
+; PWR7-LABEL: testI16:
+; PWR7:       # %bb.0: # %entry
+; PWR7-NEXT:    addis 4, 2, value16 at toc@ha
+; PWR7-NEXT:    li 6, 0
+; PWR7-NEXT:    sync
+; PWR7-NEXT:    addi 5, 4, value16 at toc@l
+; PWR7-NEXT:    ori 6, 6, 65535
+; PWR7-NEXT:    rlwinm 4, 5, 3, 27, 27
+; PWR7-NEXT:    rldicr 5, 5, 0, 61
+; PWR7-NEXT:    xori 4, 4, 16
+; PWR7-NEXT:    slw 7, 3, 4
+; PWR7-NEXT:    slw 3, 6, 4
+; PWR7-NEXT:    and 6, 7, 3
+; PWR7-NEXT:  .LBB1_1: # %entry
+; PWR7-NEXT:    #
+; PWR7-NEXT:    lwarx 7, 0, 5
+; PWR7-NEXT:    andc 8, 7, 3
+; PWR7-NEXT:    or 8, 6, 8
+; PWR7-NEXT:    stwcx. 8, 0, 5
+; PWR7-NEXT:    bne 0, .LBB1_1
+; PWR7-NEXT:  # %bb.2: # %entry
+; PWR7-NEXT:    srw 3, 7, 4
+; PWR7-NEXT:    addis 5, 2, global_int at toc@ha
+; PWR7-NEXT:    lwsync
+; PWR7-NEXT:    clrlwi 4, 3, 16
+; PWR7-NEXT:    li 3, 55
+; PWR7-NEXT:    stw 4, global_int at toc@l(5)
+; PWR7-NEXT:    blr
+;
+; PWR9-LABEL: testI16:
+; PWR9:       # %bb.0: # %entry
+; PWR9-NEXT:    addis 4, 2, value16 at toc@ha
+; PWR9-NEXT:    sync
+; PWR9-NEXT:    addi 5, 4, value16 at toc@l
+; PWR9-NEXT:  .LBB1_1: # %entry
+; PWR9-NEXT:    #
+; PWR9-NEXT:    lharx 4, 0, 5
+; PWR9-NEXT:    sthcx. 3, 0, 5
+; PWR9-NEXT:    bne 0, .LBB1_1
+; PWR9-NEXT:  # %bb.2: # %entry
+; PWR9-NEXT:    clrlwi 3, 4, 16
+; PWR9-NEXT:    addis 4, 2, global_int at toc@ha
+; PWR9-NEXT:    lwsync
+; PWR9-NEXT:    stw 3, global_int at toc@l(4)
+; PWR9-NEXT:    li 3, 55
+; PWR9-NEXT:    blr
+entry:
+  %0 = atomicrmw xchg i16* getelementptr inbounds ({ i16 }, { i16 }* @value16, i64 0, i32 0), i16 %val seq_cst, align 2
+  %conv = zext i16 %0 to i32
+  store i32 %conv, i32* @global_int, align 4
+  ret i32 55
+}
+
+attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list