[llvm] 45ad207 - [PowerPC] Add fix to partword atomic operations
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Thu May 20 10:36:44 PDT 2021
Author: Stefan Pintilie
Date: 2021-05-20T12:36:37-05:00
New Revision: 45ad207e4585ff18ed11509991bf8512f2466818
URL: https://github.com/llvm/llvm-project/commit/45ad207e4585ff18ed11509991bf8512f2466818
DIFF: https://github.com/llvm/llvm-project/commit/45ad207e4585ff18ed11509991bf8512f2466818.diff
LOG: [PowerPC] Add fix to partword atomic operations
Partword atomic binaries are not zero extended as they should be.
This patch fixes them to ensure that they are zero extended.
Reviewed By: nemanjai, #powerpc
Differential Revision: https://reviews.llvm.org/D102819
Added:
llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/atomics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e44c5f8754761..2e681a4fed30c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11225,6 +11225,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+ Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
Register Ptr1Reg;
Register TmpReg =
(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
@@ -11252,7 +11253,8 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
// stwcx. tmp4, ptr
// bne- loopMBB
// fallthrough --> exitMBB
- // srw dest, tmpDest, shift
+ // srw SrwDest, tmpDest, shift
+ // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
@@ -11354,7 +11356,14 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
+ // Since the shift amount is not a constant, we need to clear
+ // the upper bits with a separate RLWINM.
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
+ .addReg(SrwDestReg)
+ .addImm(0)
+ .addImm(is8bit ? 24 : 16)
+ .addImm(31);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
.addReg(TmpDestReg)
.addReg(ShiftReg);
return BB;
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 008cd4c7157c1..1cb6708380369 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -360,6 +360,7 @@ define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
; PPC32-NEXT: bne cr0, .LBB12_1
; PPC32-NEXT: # %bb.2:
; PPC32-NEXT: srw r3, r7, r3
+; PPC32-NEXT: clrlwi r3, r3, 24
; PPC32-NEXT: blr
;
; PPC64-LABEL: add_i8_monotonic:
@@ -380,6 +381,7 @@ define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
; PPC64-NEXT: bne cr0, .LBB12_1
; PPC64-NEXT: # %bb.2:
; PPC64-NEXT: srw r3, r7, r3
+; PPC64-NEXT: clrlwi r3, r3, 24
; PPC64-NEXT: blr
%val = atomicrmw add i8* %mem, i8 %operand monotonic
ret i8 %val
@@ -405,6 +407,7 @@ define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
; PPC32-NEXT: bne cr0, .LBB13_1
; PPC32-NEXT: # %bb.2:
; PPC32-NEXT: srw r3, r7, r3
+; PPC32-NEXT: clrlwi r3, r3, 16
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
@@ -428,6 +431,7 @@ define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
; PPC64-NEXT: bne cr0, .LBB13_1
; PPC64-NEXT: # %bb.2:
; PPC64-NEXT: srw r3, r7, r3
+; PPC64-NEXT: clrlwi r3, r3, 16
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
%val = atomicrmw xor i16* %mem, i16 %operand seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll b/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll
new file mode 100644
index 0000000000000..13564d6865e67
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 %s -o - | FileCheck %s --check-prefix=PWR7
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr9 %s -o - | FileCheck %s --check-prefix=PWR9
+
+ at value8 = dso_local global { i8 } zeroinitializer, align 1
+ at value16 = dso_local global { i16 } zeroinitializer, align 2
+ at global_int = dso_local local_unnamed_addr global i32 0, align 4
+
+define dso_local zeroext i32 @testI8(i8 zeroext %val) local_unnamed_addr #0 {
+; PWR7-LABEL: testI8:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: addis 4, 2, value8 at toc@ha
+; PWR7-NEXT: li 6, 255
+; PWR7-NEXT: sync
+; PWR7-NEXT: addi 5, 4, value8 at toc@l
+; PWR7-NEXT: rlwinm 4, 5, 3, 27, 28
+; PWR7-NEXT: rldicr 5, 5, 0, 61
+; PWR7-NEXT: xori 4, 4, 24
+; PWR7-NEXT: slw 7, 3, 4
+; PWR7-NEXT: slw 3, 6, 4
+; PWR7-NEXT: and 6, 7, 3
+; PWR7-NEXT: .LBB0_1: # %entry
+; PWR7-NEXT: #
+; PWR7-NEXT: lwarx 7, 0, 5
+; PWR7-NEXT: andc 8, 7, 3
+; PWR7-NEXT: or 8, 6, 8
+; PWR7-NEXT: stwcx. 8, 0, 5
+; PWR7-NEXT: bne 0, .LBB0_1
+; PWR7-NEXT: # %bb.2: # %entry
+; PWR7-NEXT: srw 3, 7, 4
+; PWR7-NEXT: addis 5, 2, global_int at toc@ha
+; PWR7-NEXT: lwsync
+; PWR7-NEXT: clrlwi 4, 3, 24
+; PWR7-NEXT: li 3, 55
+; PWR7-NEXT: stw 4, global_int at toc@l(5)
+; PWR7-NEXT: blr
+;
+; PWR9-LABEL: testI8:
+; PWR9: # %bb.0: # %entry
+; PWR9-NEXT: addis 4, 2, value8 at toc@ha
+; PWR9-NEXT: sync
+; PWR9-NEXT: addi 5, 4, value8 at toc@l
+; PWR9-NEXT: .LBB0_1: # %entry
+; PWR9-NEXT: #
+; PWR9-NEXT: lbarx 4, 0, 5
+; PWR9-NEXT: stbcx. 3, 0, 5
+; PWR9-NEXT: bne 0, .LBB0_1
+; PWR9-NEXT: # %bb.2: # %entry
+; PWR9-NEXT: clrlwi 3, 4, 24
+; PWR9-NEXT: addis 4, 2, global_int at toc@ha
+; PWR9-NEXT: lwsync
+; PWR9-NEXT: stw 3, global_int at toc@l(4)
+; PWR9-NEXT: li 3, 55
+; PWR9-NEXT: blr
+entry:
+ %0 = atomicrmw xchg i8* getelementptr inbounds ({ i8 }, { i8 }* @value8, i64 0, i32 0), i8 %val seq_cst, align 1
+ %conv = zext i8 %0 to i32
+ store i32 %conv, i32* @global_int, align 4
+ ret i32 55
+}
+
+define dso_local zeroext i32 @testI16(i16 zeroext %val) local_unnamed_addr #0 {
+; PWR7-LABEL: testI16:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: addis 4, 2, value16 at toc@ha
+; PWR7-NEXT: li 6, 0
+; PWR7-NEXT: sync
+; PWR7-NEXT: addi 5, 4, value16 at toc@l
+; PWR7-NEXT: ori 6, 6, 65535
+; PWR7-NEXT: rlwinm 4, 5, 3, 27, 27
+; PWR7-NEXT: rldicr 5, 5, 0, 61
+; PWR7-NEXT: xori 4, 4, 16
+; PWR7-NEXT: slw 7, 3, 4
+; PWR7-NEXT: slw 3, 6, 4
+; PWR7-NEXT: and 6, 7, 3
+; PWR7-NEXT: .LBB1_1: # %entry
+; PWR7-NEXT: #
+; PWR7-NEXT: lwarx 7, 0, 5
+; PWR7-NEXT: andc 8, 7, 3
+; PWR7-NEXT: or 8, 6, 8
+; PWR7-NEXT: stwcx. 8, 0, 5
+; PWR7-NEXT: bne 0, .LBB1_1
+; PWR7-NEXT: # %bb.2: # %entry
+; PWR7-NEXT: srw 3, 7, 4
+; PWR7-NEXT: addis 5, 2, global_int at toc@ha
+; PWR7-NEXT: lwsync
+; PWR7-NEXT: clrlwi 4, 3, 16
+; PWR7-NEXT: li 3, 55
+; PWR7-NEXT: stw 4, global_int at toc@l(5)
+; PWR7-NEXT: blr
+;
+; PWR9-LABEL: testI16:
+; PWR9: # %bb.0: # %entry
+; PWR9-NEXT: addis 4, 2, value16 at toc@ha
+; PWR9-NEXT: sync
+; PWR9-NEXT: addi 5, 4, value16 at toc@l
+; PWR9-NEXT: .LBB1_1: # %entry
+; PWR9-NEXT: #
+; PWR9-NEXT: lharx 4, 0, 5
+; PWR9-NEXT: sthcx. 3, 0, 5
+; PWR9-NEXT: bne 0, .LBB1_1
+; PWR9-NEXT: # %bb.2: # %entry
+; PWR9-NEXT: clrlwi 3, 4, 16
+; PWR9-NEXT: addis 4, 2, global_int at toc@ha
+; PWR9-NEXT: lwsync
+; PWR9-NEXT: stw 3, global_int at toc@l(4)
+; PWR9-NEXT: li 3, 55
+; PWR9-NEXT: blr
+entry:
+ %0 = atomicrmw xchg i16* getelementptr inbounds ({ i16 }, { i16 }* @value16, i64 0, i32 0), i16 %val seq_cst, align 2
+ %conv = zext i16 %0 to i32
+ store i32 %conv, i32* @global_int, align 4
+ ret i32 55
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list