[llvm] 2e25204 - Make `shouldExpandLogicAtomicRMWInIR` able to match both operands.
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 21 00:58:55 PST 2023
Author: Noah Goldstein
Date: 2023-01-21T00:53:34-08:00
New Revision: 2e25204779e5b972d668bf66a0014c1325813b35
URL: https://github.com/llvm/llvm-project/commit/2e25204779e5b972d668bf66a0014c1325813b35
DIFF: https://github.com/llvm/llvm-project/commit/2e25204779e5b972d668bf66a0014c1325813b35.diff
LOG: Make `shouldExpandLogicAtomicRMWInIR` able to match both operands.
Previous logic was buggy and erroneously asserted that I->operand(0) must
be the RMW instruction. This change fixes that and makes it so that the
RMW instruction can be used in operand 0 or 1.
Also update the tests to explicitly test RMW as operand 0/1 (no change
to codegen).
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D142166
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 411462f8ff07b..f999e27c97582 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31520,12 +31520,16 @@ X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
AI->getParent() != I->getParent())
return AtomicExpansionKind::CmpXChg;
- assert(I->getOperand(0) == AI);
+ unsigned OtherIdx = I->getOperand(0) == AI ? 1 : 0;
+
+ // This is a redundant AND, it should get cleaned up elsewhere.
+ if (AI == I->getOperand(OtherIdx))
+ return AtomicExpansionKind::CmpXChg;
+
// The following instruction must be a AND single bit.
if (BitChange.second == ConstantBit || BitChange.second == NotConstantBit) {
- auto *C1 = dyn_cast<ConstantInt>(AI->getValOperand());
- assert(C1 != nullptr);
- auto *C2 = dyn_cast<ConstantInt>(I->getOperand(1));
+ auto *C1 = cast<ConstantInt>(AI->getValOperand());
+ auto *C2 = dyn_cast<ConstantInt>(I->getOperand(OtherIdx));
if (!C2 || !isPowerOf2_64(C2->getZExtValue())) {
return AtomicExpansionKind::CmpXChg;
}
@@ -31540,7 +31544,7 @@ X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
assert(BitChange.second == ShiftBit || BitChange.second == NotShiftBit);
- auto BitTested = FindSingleBitChange(I->getOperand(1));
+ auto BitTested = FindSingleBitChange(I->getOperand(OtherIdx));
if (BitTested.second != ShiftBit && BitTested.second != NotShiftBit)
return AtomicExpansionKind::CmpXChg;
@@ -31591,9 +31595,9 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
Value *Result = nullptr;
auto BitTested = FindSingleBitChange(AI->getValOperand());
assert(BitTested.first != nullptr);
+
if (BitTested.second == ConstantBit || BitTested.second == NotConstantBit) {
- auto *C = dyn_cast<ConstantInt>(I->getOperand(1));
- assert(C != nullptr);
+ auto *C = cast<ConstantInt>(I->getOperand(I->getOperand(0) == AI ? 1 : 0));
BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType());
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
index f7ebf302adb6d..3dcf8c6c00ef1 100644
--- a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll
@@ -15,7 +15,7 @@ define i64 @atomic_shl1_xor_64_gpr_val(ptr %v, i64 %c) nounwind {
entry:
%shl = shl nuw i64 1, %c
%0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
- %and = and i64 %0, %shl
+ %and = and i64 %shl, %0
ret i64 %and
}
@@ -109,7 +109,7 @@ entry:
%shl = shl nuw i64 1, %rem
%0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
%shl1 = shl nuw i64 1, %c
- %and = and i64 %0, %shl1
+ %and = and i64 %shl1, %0
ret i64 %and
}
@@ -200,7 +200,7 @@ define i64 @atomic_shl1_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
entry:
%shl = shl nuw i64 1, %c
%0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
- %1 = xor i64 %0, -1
+ %1 = sub i64 -1, %0
%2 = lshr i64 %1, %c
%conv = and i64 %2, 1
ret i64 %conv
@@ -223,14 +223,14 @@ define i64 @atomic_shl2_xor_64_gpr_valz(ptr %v, i64 %c) nounwind {
; CHECK-NEXT: jne .LBB9_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testq %rdx, %rax
+; CHECK-NEXT: testq %rax, %rdx
; CHECK-NEXT: sete %cl
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: retq
entry:
%shl = shl i64 2, %c
%0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8
- %and = and i64 %0, %shl
+ %and = and i64 %shl, %0
%tobool.not = icmp eq i64 %and, 0
%conv = zext i1 %tobool.not to i64
ret i64 %conv
@@ -670,7 +670,7 @@ define i64 @atomic_shl1_and_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-NEXT: retq
entry:
%shl = shl nuw i64 1, %c
- %not = xor i64 %shl, -1
+ %not = sub i64 -1, %shl
%0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
%and = and i64 %0, %shl
ret i64 %and
@@ -769,7 +769,7 @@ define i64 @atomic_shl1_mask0_and_64_gpr_val(ptr %v, i64 %c) nounwind {
entry:
%rem = and i64 %c, 63
%shl = shl nuw i64 1, %rem
- %not = xor i64 %shl, -1
+ %not = sub i64 -1, %shl
%0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
%shl1 = shl nuw i64 1, %c
%and = and i64 %0, %shl1
@@ -834,14 +834,15 @@ define i64 @atomic_blsi_and_64_gpr_val(ptr %v, i64 %c) nounwind {
; CHECK-NEXT: lock cmpxchgq %rsi, (%rdi)
; CHECK-NEXT: jne .LBB31_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: andq %rcx, %rax
+; CHECK-NEXT: andq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: retq
entry:
%sub = sub i64 0, %c
%and = and i64 %sub, %c
%not = xor i64 %and, -1
%0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
- %and3 = and i64 %0, %and
+ %and3 = and i64 %and, %0
ret i64 %and3
}
@@ -893,7 +894,7 @@ define i64 @atomic_shl2_and_64_gpr_valnz(ptr %v, i64 %c) nounwind {
; CHECK-NEXT: jne .LBB33_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testq %rdx, %rax
+; CHECK-NEXT: testq %rax, %rdx
; CHECK-NEXT: setne %cl
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: retq
@@ -901,7 +902,7 @@ entry:
%shl = shl i64 2, %c
%not = xor i64 %shl, -1
%0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
- %and = and i64 %0, %shl
+ %and = and i64 %shl, %0
%tobool = icmp ne i64 %and, 0
%conv = zext i1 %tobool to i64
ret i64 %conv
@@ -1113,7 +1114,7 @@ entry:
%shl = shl nuw i64 1, %c
%not = xor i64 %shl, -1
%0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
- %and = and i64 %0, %shl
+ %and = and i64 %shl, %0
%tobool.not = icmp eq i64 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -1265,7 +1266,7 @@ entry:
%not = xor i64 %shl, -1
%0 = atomicrmw and ptr %v, i64 %not monotonic, align 8
%shl1 = shl nuw i64 1, %c
- %and = and i64 %0, %shl1
+ %and = and i64 %shl1, %0
%tobool.not = icmp eq i64 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -1470,7 +1471,7 @@ define i64 @atomic_shl1_xor_64_const_brz(ptr %v) nounwind {
; CHECK-NEXT: retq
entry:
%0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8
- %and = and i64 %0, 16
+ %and = and i64 16, %0
%tobool.not = icmp eq i64 %and, 0
br i1 %tobool.not, label %if.then, label %return
diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
index b0f4e08f292e4..5a5d207b33734 100644
--- a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
+++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll
@@ -19,7 +19,8 @@ define zeroext i8 @atomic_shl1_or_8_gpr_val(ptr %v, i8 zeroext %c) nounwind {
; X86-NEXT: lock cmpxchgb %cl, (%esi)
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andb %dl, %al
+; X86-NEXT: andb %al, %dl
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -38,14 +39,15 @@ define zeroext i8 @atomic_shl1_or_8_gpr_val(ptr %v, i8 zeroext %c) nounwind {
; X64-NEXT: lock cmpxchgb %cl, (%rdi)
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andb %dl, %al
+; X64-NEXT: andb %al, %dl
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
entry:
%conv = zext i8 %c to i32
%shl = shl nuw i32 1, %conv
%conv1 = trunc i32 %shl to i8
%0 = atomicrmw or ptr %v, i8 %conv1 monotonic, align 1
- %conv5 = and i8 %0, %conv1
+ %conv5 = and i8 %conv1, %0
ret i8 %conv5
}
@@ -111,21 +113,24 @@ entry:
define zeroext i8 @atomic_shl1_mask01_or_8_gpr_val(ptr %v, i8 zeroext %c) nounwind {
; X86-LABEL: atomic_shl1_mask01_or_8_gpr_val:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $7, %cl
-; X86-NEXT: movb $1, %ah
-; X86-NEXT: shlb %cl, %ah
-; X86-NEXT: movb (%edx), %al
+; X86-NEXT: movb $1, %dl
+; X86-NEXT: shlb %cl, %dl
+; X86-NEXT: movzbl (%esi), %eax
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB2_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: orb %ah, %cl
-; X86-NEXT: lock cmpxchgb %cl, (%edx)
+; X86-NEXT: orb %dl, %cl
+; X86-NEXT: lock cmpxchgb %cl, (%esi)
; X86-NEXT: jne .LBB2_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: andb %ah, %al
+; X86-NEXT: andb %al, %dl
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: atomic_shl1_mask01_or_8_gpr_val:
@@ -144,13 +149,14 @@ define zeroext i8 @atomic_shl1_mask01_or_8_gpr_val(ptr %v, i8 zeroext %c) nounwi
; X64-NEXT: lock cmpxchgb %cl, (%rdi)
; X64-NEXT: jne .LBB2_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
-; X64-NEXT: andb %dl, %al
+; X64-NEXT: andb %al, %dl
+; X64-NEXT: movl %edx, %eax
; X64-NEXT: retq
entry:
%0 = and i8 %c, 7
%shl = shl nuw i8 1, %0
%1 = atomicrmw or ptr %v, i8 %shl monotonic, align 1
- %conv7 = and i8 %1, %shl
+ %conv7 = and i8 %shl, %1
ret i8 %conv7
}
@@ -777,7 +783,7 @@ entry:
%0 = and i16 %c, 7
%shl = shl nuw nsw i16 1, %0
%1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2
- %and = and i16 %1, %shl
+ %and = and i16 %shl, %1
ret i16 %and
}
@@ -2367,7 +2373,7 @@ entry:
%shl = shl nuw nsw i16 1, %0
%not = xor i16 %shl, -1
%1 = atomicrmw and ptr %v, i16 %not monotonic, align 2
- %and = and i16 %1, %shl
+ %and = and i16 %shl, %1
ret i16 %and
}
@@ -3552,7 +3558,7 @@ define zeroext i16 @atomic_shl1_or_16_const_val(ptr %v) nounwind {
; X64-NEXT: retq
entry:
%0 = atomicrmw or ptr %v, i16 16 monotonic, align 2
- %1 = and i16 %0, 16
+ %1 = and i16 16, %0
ret i16 %1
}
@@ -3776,7 +3782,7 @@ define i32 @atomic_shl1_or_32_gpr_val(ptr %v, i32 %c) nounwind {
entry:
%shl = shl nuw i32 1, %c
%0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
- %and = and i32 %0, %shl
+ %and = and i32 %shl, %0
ret i32 %and
}
@@ -3871,7 +3877,7 @@ entry:
%0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
%1 = and i32 %c, 31
%shl1 = shl nuw i32 1, %1
- %and = and i32 %0, %shl1
+ %and = and i32 %shl1, %0
ret i32 %and
}
@@ -4689,7 +4695,7 @@ entry:
%0 = and i32 %c, 15
%shl = shl nuw nsw i32 1, %0
%1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
- %and = and i32 %1, %shl
+ %and = and i32 %shl, %1
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -4835,7 +4841,7 @@ entry:
%rem = and i32 %c, 31
%shl = shl nuw i32 1, %rem
%0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
- %and = and i32 %0, %shl
+ %and = and i32 %shl, %0
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -5001,7 +5007,7 @@ entry:
%0 = and i32 %c, 15
%shl = shl nuw nsw i32 1, %0
%1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
- %and = and i32 %1, %shl
+ %and = and i32 %shl, %1
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %if.then, label %return
@@ -5099,7 +5105,7 @@ entry:
%0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
%rem = and i32 %c, 31
%shl1 = shl nuw i32 1, %rem
- %and = and i32 %0, %shl1
+ %and = and i32 %shl1, %0
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %if.then, label %return
@@ -5268,7 +5274,7 @@ define i32 @atomic_shl1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind {
entry:
%shl = shl nuw i32 1, %c
%0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
- %and = and i32 %0, %shl
+ %and = and i32 %shl, %0
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -5460,7 +5466,7 @@ entry:
%rem = and i32 %c, 31
%shl = shl nuw i32 1, %rem
%0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4
- %and = and i32 %0, %shl
+ %and = and i32 %shl, %0
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -5763,7 +5769,7 @@ entry:
%shl = shl nuw i32 1, %c
%0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
%1 = and i32 %c, 31
- %2 = xor i32 %0, -1
+ %2 = xor i32 -1, %0
%3 = lshr i32 %2, %1
%lnot.ext = and i32 %3, 1
ret i32 %lnot.ext
@@ -6011,7 +6017,7 @@ entry:
%shl = shl nuw i32 1, %rem
%0 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
%shl1 = shl nuw i32 1, %c
- %and = and i32 %0, %shl1
+ %and = and i32 %shl1, %0
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -6179,7 +6185,7 @@ entry:
%sub = sub i32 0, %c
%and = and i32 %sub, %c
%0 = atomicrmw and ptr %v, i32 %and monotonic, align 4
- %and3 = and i32 %0, %and
+ %and3 = and i32 %and, %0
%tobool.not = icmp eq i32 %and3, 0
br i1 %tobool.not, label %return, label %if.then
@@ -6274,7 +6280,7 @@ entry:
%0 = and i32 %c, 15
%shl = shl nuw nsw i32 1, %0
%1 = atomicrmw and ptr %v, i32 %shl monotonic, align 4
- %and = and i32 %1, %shl
+ %and = and i32 %shl, %1
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %if.then, label %return
@@ -6551,7 +6557,7 @@ define i32 @atomic_shl1_xor_32_const_val(ptr %v) nounwind {
; X64-NEXT: retq
entry:
%0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4
- %and = and i32 %0, 16
+ %and = and i32 16, %0
ret i32 %and
}
@@ -6765,7 +6771,7 @@ define i32 @atomic_shl1_and_32_const_br(ptr %v) nounwind {
; X64-NEXT: retq
entry:
%0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4
- %and = and i32 %0, 16
+ %and = and i32 16, %0
%tobool.not = icmp eq i32 %and, 0
br i1 %tobool.not, label %return, label %if.then
@@ -6817,3 +6823,53 @@ return: ; preds = %entry, %if.then
%retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ]
ret i32 %retval.0
}
+
+; This IR isn't really ever expected. This test is just make sure we don't crash.
+define i32 @atomic_xor_dead_and(ptr %v, i32 %c) nounwind {
+; X86-LABEL: atomic_xor_dead_and:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $7, %cl
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: movl (%edx), %eax
+; X86-NEXT: .p2align 4, 0x90
+; X86-NEXT: .LBB122_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: xorl %esi, %ecx
+; X86-NEXT: lock cmpxchgl %ecx, (%edx)
+; X86-NEXT: jne .LBB122_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: atomic_xor_dead_and:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: andb $7, %cl
+; X64-NEXT: movl $1, %edx
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: shll %cl, %edx
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB122_1: # %atomicrmw.start
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: xorl %edx, %ecx
+; X64-NEXT: lock cmpxchgl %ecx, (%rdi)
+; X64-NEXT: jne .LBB122_1
+; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: andl %edx, %eax
+; X64-NEXT: retq
+entry:
+ %0 = and i32 %c, 7
+ %shl = shl nuw nsw i32 1, %0
+ %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4
+ %and = and i32 %1, %1
+ %and1 = and i32 %and, %shl
+ ret i32 %and1
+}
More information about the llvm-commits
mailing list