[llvm] RegisterCoalescer: Fix implicit operand handling during rematerialize (PR #75271)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 12 19:34:48 PST 2023
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/75271
>From 3ab4746102a0f513f80fbe20b688a769de301d99 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 8 Dec 2023 10:34:49 +0700
Subject: [PATCH 1/5] RegisterCoalescer: Add undef flags in
removePartialRedundancy
If the copy being hoisted was undef, we have the same problems that
eliminateUndefCopy needs to solve. We would effectively be introducing a
new live out implicit_def. We need to add an undef flag to avoid artificially
introducing a live through undef value. Previously, the verifier would fail
due to the dead def inside the loop providing the live in value for the %1 use.
---
llvm/lib/CodeGen/RegisterCoalescer.cpp | 13 +++++
...-redundancy-clear-dead-flag-undef-copy.mir | 47 +++++++++++++++++++
2 files changed, 60 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c067d87a9fd81..1b6b94c27fc61 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1214,6 +1214,19 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
&EndPoints);
BValNo->markUnused();
+
+ if (CopyMI.getOperand(1).isUndef()) {
+ // We're introducing an undef phi def, and need to set undef on any users of
+ // the previously local def to avoid artifically extending the lifetime
+ // through the block.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(IntB.reg())) {
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI);
+ if (!IntB.liveAt(UseIdx))
+ MO.setIsUndef(true);
+ }
+ }
+
// Extend IntB to the EndPoints of its original live interval.
LIS->extendToIndices(IntB, EndPoints);
diff --git a/llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir b/llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir
new file mode 100644
index 0000000000000..5f33be0bc1555
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir
@@ -0,0 +1,47 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=x86_64-pc-linux-gnu -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
+
+# Check for "Live range continues after dead def flag".
+
+# There are 2 copies of undef, but the registers also appear to be
+# live due to block live outs, and thus were not deleted as
+# eliminateUndefCopy only considered the live range, and not the undef
+# flag.
+#
+# removePartialRedundancy would move the COPY undef %0 in bb.1 to
+# bb.0. The live range of %1 would then be extended to be live out of
+# %bb.1 for the backedge phi. This would then fail the verifier, since
+# the dead flag was no longer valid. This was fixed by directly
+# considering the undef flag to avoid considering this special case.
+
+---
+name: partial_redundancy_coalesce_undef_copy_live_out
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: partial_redundancy_coalesce_undef_copy_live_out
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $rdi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $rdi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri undef [[XOR32ri]], 1, implicit-def dead $eflags
+ ; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[COPY]]
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = IMPLICIT_DEF
+ ; CHECK-NEXT: JMP_1 %bb.1
+ bb.0:
+ liveins: $rdi
+
+ %0:gr32 = COPY $rdi
+
+ bb.1:
+ %1:gr32 = COPY undef %0
+ dead %1:gr32 = XOR32ri %1, 1, implicit-def dead $eflags
+ dead %2:gr32 = MOV32rr killed %0
+ %0:gr32 = COPY killed undef %1
+ JMP_1 %bb.1
+
+...
>From 0e5e2f87c60920067012faf2ca006940c2846b2c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 12 Dec 2023 16:22:09 +0700
Subject: [PATCH 2/5] Don't use deleted instr
---
llvm/lib/CodeGen/RegisterCoalescer.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 1b6b94c27fc61..2ef91e78f3370 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1201,6 +1201,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
<< printMBBReference(MBB) << '\t' << CopyMI);
}
+ const bool IsUndefCopy = CopyMI.getOperand(1).isUndef();
+
// Remove CopyMI.
// Note: This is fine to remove the copy before updating the live-ranges.
// While updating the live-ranges, we only look at slot indices and
@@ -1215,7 +1217,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
&EndPoints);
BValNo->markUnused();
- if (CopyMI.getOperand(1).isUndef()) {
+ if (IsUndefCopy) {
// We're introducing an undef phi def, and need to set undef on any users of
// the previously local def to avoid artifically extending the lifetime
// through the block.
>From 34f0e2e8452754b20e19795c3b0f34715768657c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 12 Dec 2023 16:07:46 +0700
Subject: [PATCH 3/5] RegisterCoalescer: Fix implicit operand handling during
rematerialize
If the rematerialize was placing a subregister into a super register,
and implicit operands referenced the original register, we need to add
undef flags to the now-subregister indexed implicit operands.
---
llvm/lib/CodeGen/RegisterCoalescer.cpp | 3 +-
...-remat-with-undef-implicit-def-operand.mir | 123 ++++++++++++++++++
2 files changed, 124 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 2ef91e78f3370..c1af37c8510ff 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1611,8 +1611,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
- if (NewMI.getOperand(0).getSubReg())
- NewMI.getOperand(0).setIsUndef();
+ NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
// Transfer over implicit operands to the rematerialized instruction.
for (MachineOperand &MO : ImplicitOps)
diff --git a/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir b/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir
new file mode 100644
index 0000000000000..42d17130412b6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir
@@ -0,0 +1,123 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing -run-pass=register-coalescer -o - %s | FileCheck %s
+
+# The %1 = MOV32r0 is rematerialized as a subregister of %2. The
+# implicit-def %1 operand needs to have an undef added, just like the
+# main result operand.
+
+---
+name: remat_into_subregister_set_undef_implicit_operand_subregisters
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $rdi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]]
+ ; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_32bit, implicit-def [[MOV32r0_2]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JCC_1 %bb.4, 5, implicit killed undef $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
+ ; CHECK-NEXT: dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
+ ; CHECK-NEXT: JMP_1 %bb.1
+ bb.0:
+ liveins: $rdi
+
+ undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+ %1:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def %1
+ undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+
+ bb.1:
+ %2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
+
+ bb.2:
+ JCC_1 %bb.4, 5, implicit killed undef $eflags
+
+ bb.3:
+
+ bb.4:
+ dead %3:gr32 = MOV32rr %1
+ dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
+ %1:gr32 = COPY %2.sub_32bit
+ JMP_1 %bb.1
+
+...
+
+# Same, except the implicit-def on the original instruction already
+# has a subregister index.
+
+---
+name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $rdi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_1]].sub_8bit
+ ; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_8bit, implicit-def [[MOV32r0_2]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JCC_1 %bb.4, 5, implicit killed undef $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
+ ; CHECK-NEXT: dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
+ ; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
+ ; CHECK-NEXT: JMP_1 %bb.1
+ bb.0:
+ liveins: $rdi
+
+ undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+ %1:gr32 = MOV32r0 implicit-def dead $eflags, undef implicit-def %1.sub_8bit
+ undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+
+ bb.1:
+ %2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
+
+ bb.2:
+ JCC_1 %bb.4, 5, implicit killed undef $eflags
+
+ bb.3:
+
+ bb.4:
+ dead %3:gr32 = MOV32rr %1
+ dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
+ %1:gr32 = COPY %2.sub_32bit
+ JMP_1 %bb.1
+
+...
>From 6d4120f840508bdf26d1ed867978afcd9d6134d4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 8 Dec 2023 10:15:59 +0700
Subject: [PATCH 4/5] Add IR test
---
.../X86/coalescer-dead-flag-verifier-error.ll | 65 +++++++++++++++++++
1 file changed, 65 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
diff --git a/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll b/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
new file mode 100644
index 0000000000000..1036f9a53178d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing < %s | FileCheck %s
+
+%"class.llvm::APInt." = type <{ %union.anon., i32, [4 x i8] }>
+%union.anon. = type { i64 }
+
+define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0_(ptr %r) {
+; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0_:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: jmp .LBB0_1
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movl %edx, %edx
+; CHECK-NEXT: shlq $4, %rdx
+; CHECK-NEXT: movl $0, (%rdi,%rdx)
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: .LBB0_1: # %bb
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB0_3 Depth 2
+; CHECK-NEXT: xorl $1, %ecx
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: movq %rcx, %r8
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_4
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_3: # %for.body.i.i.i.i.i.3
+; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: orq $1, %r8
+; CHECK-NEXT: orq $1, %rsi
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: jmp .LBB0_4
+entry:
+ br label %bb
+
+bb: ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
+ %xor = xor i32 %i.0, 1
+ %idxprom = zext nneg i32 %xor to i64
+ br label %for.body.i.i.i.i.i
+
+for.body.i.i.i.i.i: ; preds = %for.body.i.i.i.i.i.3, %bb
+ %lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
+ %lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %idxprom, %bb ]
+ %exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
+ br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3
+
+for.body.i.i.i.i.i.3: ; preds = %for.body.i.i.i.i.i
+ %sunkaddr55 = mul i64 %lsr.iv37, 0
+ %i = xor i64 %lsr.iv, 0
+ %lsr.iv.next = or i64 %lsr.iv, 1
+ %lsr.iv.next38 = or i64 %lsr.iv37, 1
+ br label %for.body.i.i.i.i.i
+
+_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i: ; preds = %for.body.i.i.i.i.i
+ %idxprom12 = zext nneg i32 %i.0 to i64
+ %arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
+ store i32 0, ptr %arrayidx13, align 4
+ br label %bb
+}
>From 9c39a34f7d89cbba0cbff1dadde0f29c952a61da Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 13 Dec 2023 10:33:37 +0700
Subject: [PATCH 5/5] Add more IR test
---
.../X86/coalescer-dead-flag-verifier-error.ll | 66 +++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll b/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
index 1036f9a53178d..4a396599a4444 100644
--- a/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
+++ b/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
@@ -63,3 +63,69 @@ _ZNK4llvm5APInt13getActiveBitsEv.exit.i.i: ; preds = %for.body.i.i.i.i.i
store i32 0, ptr %arrayidx13, align 4
br label %bb
}
+
+; This variant hit an assert and never reached the verifier error
+define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0__assert(ptr %r) {
+; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0__assert:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: jmp .LBB1_1
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB1_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
+; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT: movl %edx, %edx
+; CHECK-NEXT: shlq $4, %rdx
+; CHECK-NEXT: movl $0, (%rdi,%rdx)
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: .LBB1_1: # %bb
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB1_3 Depth 2
+; CHECK-NEXT: xorl $1, %ecx
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: shlq $4, %rsi
+; CHECK-NEXT: movq (%rsi), %rsi
+; CHECK-NEXT: xorl %r8d, %r8d
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB1_4
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB1_3: # %for.body.i.i.i.i.i.3
+; CHECK-NEXT: # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: orq $1, %rsi
+; CHECK-NEXT: orq $1, %r8
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB1_3
+; CHECK-NEXT: jmp .LBB1_4
+entry:
+ br label %bb
+
+bb: ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
+ %xor = xor i32 %i.0, 1
+ %idxprom = zext nneg i32 %xor to i64
+ %arrayidx = getelementptr [2 x %"class.llvm::APInt."], ptr null, i64 0, i64 %idxprom
+ %i.i.i.i.i.i = load ptr, ptr %arrayidx, align 16
+ %i.i.i.i.i.i36 = ptrtoint ptr %i.i.i.i.i.i to i64
+ br label %for.body.i.i.i.i.i
+
+for.body.i.i.i.i.i: ; preds = %for.body.i.i.i.i.i.3, %bb
+ %lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
+ %lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %i.i.i.i.i.i36, %bb ]
+ %exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
+ br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3
+
+for.body.i.i.i.i.i.3: ; preds = %for.body.i.i.i.i.i
+ %sunkaddr55 = mul i64 %lsr.iv37, 0
+ %i = xor i64 %lsr.iv, 0
+ %lsr.iv.next = or i64 %lsr.iv, 1
+ %lsr.iv.next38 = or i64 %lsr.iv37, 1
+ br label %for.body.i.i.i.i.i
+
+_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i: ; preds = %for.body.i.i.i.i.i
+ %idxprom12 = zext nneg i32 %i.0 to i64
+ %arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
+ store i32 0, ptr %arrayidx13, align 4
+ br label %bb
+}
More information about the llvm-commits
mailing list