[llvm] [CodeGenPrepare] sinkCmpExpression - don't sink larger than legal integer comparisons (PR #166778)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 7 07:54:07 PST 2025
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/166778
>From 74a1f98e1a01ded01cbb3c46574a60f16f032700 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 6 Nov 2025 14:09:45 +0000
Subject: [PATCH] [CodeGenPrepare] sinkCmpExpression - don't sink larger than
legal integer comparisons
A generic alternative to #166564 - make the assumption that expanding integer comparisons will be expensive if the are larger than the largest legal type
Thumb codegen seems to suffer more than most
Fixes #166534
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 11 ++-
llvm/test/CodeGen/ARM/consthoist-icmpimm.ll | 40 +++++----
llvm/test/CodeGen/RISCV/branch-on-zero.ll | 7 +-
.../test/CodeGen/RISCV/overflow-intrinsics.ll | 82 +++++++++----------
.../RISCV/rvv/vxrm-insert-out-of-loop.ll | 2 +-
.../X86/2012-01-10-UndefExceptionEdge.ll | 55 ++++++-------
llvm/test/CodeGen/X86/pr166534.ll | 68 ++++-----------
7 files changed, 116 insertions(+), 149 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0309e225d9df4..9434de8f02e0a 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1839,7 +1839,8 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
+ const DataLayout &DL) {
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
return false;
@@ -1847,6 +1848,12 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
return false;
+ // Avoid sinking larger than legal integer comparisons.
+ if (Cmp->getOperand(0)->getType()->isIntegerTy() &&
+ Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
+ DL.getLargestLegalIntTypeSizeInBits())
+ return false;
+
// Only insert a cmp in each block once.
DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
@@ -2224,7 +2231,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
}
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
- if (sinkCmpExpression(Cmp, *TLI))
+ if (sinkCmpExpression(Cmp, *TLI, *DL))
return true;
if (combineToUAddWithOverflow(Cmp, ModifiedDT))
diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
index 16b7403bdb932..e64707769b809 100644
--- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
+++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
@@ -39,46 +39,50 @@ define i32 @icmp64_sge_0(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
;
; CHECKV7M-LABEL: icmp64_sge_0:
; CHECKV7M: @ %bb.0:
+; CHECKV7M-NEXT: mvns r0, r1
+; CHECKV7M-NEXT: lsrs r2, r0, #31
; CHECKV7M-NEXT: ldr r0, [sp, #8]
; CHECKV7M-NEXT: lsls r0, r0, #31
-; CHECKV7M-NEXT: ldrd r2, r0, [sp]
+; CHECKV7M-NEXT: ldrd r1, r0, [sp]
; CHECKV7M-NEXT: beq .LBB0_2
; CHECKV7M-NEXT: @ %bb.1: @ %then
+; CHECKV7M-NEXT: cmp r2, #0
+; CHECKV7M-NEXT: mov r2, r0
+; CHECKV7M-NEXT: it ne
+; CHECKV7M-NEXT: movne r2, r1
; CHECKV7M-NEXT: cmp.w r3, #-1
-; CHECKV7M-NEXT: mov r3, r0
-; CHECKV7M-NEXT: it gt
-; CHECKV7M-NEXT: movgt r3, r2
-; CHECKV7M-NEXT: cmp.w r1, #-1
; CHECKV7M-NEXT: it gt
-; CHECKV7M-NEXT: movgt r0, r2
-; CHECKV7M-NEXT: add r0, r3
+; CHECKV7M-NEXT: movgt r0, r1
+; CHECKV7M-NEXT: add r0, r2
; CHECKV7M-NEXT: bx lr
; CHECKV7M-NEXT: .LBB0_2: @ %else
-; CHECKV7M-NEXT: cmp.w r1, #-1
-; CHECKV7M-NEXT: it gt
-; CHECKV7M-NEXT: movgt r0, r2
+; CHECKV7M-NEXT: cmp r2, #0
+; CHECKV7M-NEXT: it ne
+; CHECKV7M-NEXT: movne r0, r1
; CHECKV7M-NEXT: bx lr
;
; CHECKV7A-LABEL: icmp64_sge_0:
; CHECKV7A: @ %bb.0:
; CHECKV7A-NEXT: ldr r2, [sp, #8]
+; CHECKV7A-NEXT: mvns r1, r1
; CHECKV7A-NEXT: ldrd r12, r0, [sp]
+; CHECKV7A-NEXT: lsrs r1, r1, #31
; CHECKV7A-NEXT: lsls r2, r2, #31
; CHECKV7A-NEXT: beq .LBB0_2
; CHECKV7A-NEXT: @ %bb.1: @ %then
+; CHECKV7A-NEXT: cmp r1, #0
+; CHECKV7A-NEXT: mov r1, r0
+; CHECKV7A-NEXT: it ne
+; CHECKV7A-NEXT: movne r1, r12
; CHECKV7A-NEXT: cmp.w r3, #-1
-; CHECKV7A-NEXT: mov r2, r0
-; CHECKV7A-NEXT: it gt
-; CHECKV7A-NEXT: movgt r2, r12
-; CHECKV7A-NEXT: cmp.w r1, #-1
; CHECKV7A-NEXT: it gt
; CHECKV7A-NEXT: movgt r0, r12
-; CHECKV7A-NEXT: add r0, r2
+; CHECKV7A-NEXT: add r0, r1
; CHECKV7A-NEXT: bx lr
; CHECKV7A-NEXT: .LBB0_2: @ %else
-; CHECKV7A-NEXT: cmp.w r1, #-1
-; CHECKV7A-NEXT: it gt
-; CHECKV7A-NEXT: movgt r0, r12
+; CHECKV7A-NEXT: cmp r1, #0
+; CHECKV7A-NEXT: it ne
+; CHECKV7A-NEXT: movne r0, r12
; CHECKV7A-NEXT: bx lr
br i1 %c, label %then, label %else
then:
diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
index 02aeebdeb3775..5524300fd36be 100644
--- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
@@ -41,11 +41,12 @@ define i64 @optbranch_64(i64 %Arg) {
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: or a2, a0, a1
-; RV32-NEXT: bnez a2, .LBB1_2
-; RV32-NEXT: # %bb.1: # %bb2
+; RV32-NEXT: beqz a2, .LBB1_2
+; RV32-NEXT: # %bb.1: # %bb3
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB1_2: # %bb2
; RV32-NEXT: li a0, -1
; RV32-NEXT: li a1, -1
-; RV32-NEXT: .LBB1_2: # %bb3
; RV32-NEXT: ret
;
; RV64-LABEL: optbranch_64:
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index ba6769b2aa3e1..3014c2a524a5e 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -232,22 +232,22 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
ret i64 %Q
}
-; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
+; Ensure CGP doesn't sink the compare before we have a chance to form the overflow intrinsic.
define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
; RV32-LABEL: uaddo4:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: andi a4, a4, 1
-; RV32-NEXT: beqz a4, .LBB6_6
-; RV32-NEXT: # %bb.1: # %next
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a0, a2, a0
; RV32-NEXT: sltu a0, a0, a2
; RV32-NEXT: add a1, a1, a0
-; RV32-NEXT: beq a3, a1, .LBB6_3
-; RV32-NEXT: # %bb.2: # %next
+; RV32-NEXT: andi a4, a4, 1
+; RV32-NEXT: beq a3, a1, .LBB6_2
+; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a0, a1, a3
-; RV32-NEXT: .LBB6_3: # %next
+; RV32-NEXT: .LBB6_2: # %entry
+; RV32-NEXT: beqz a4, .LBB6_6
+; RV32-NEXT: # %bb.3: # %next
; RV32-NEXT: bnez a0, .LBB6_5
; RV32-NEXT: # %bb.4: # %next
; RV32-NEXT: li a2, 42
@@ -292,19 +292,19 @@ exit:
define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp {
; RV32-LABEL: uaddo5:
; RV32: # %bb.0: # %entry
+; RV32-NEXT: add a6, a3, a1
+; RV32-NEXT: add a1, a2, a0
+; RV32-NEXT: sltu a0, a1, a2
+; RV32-NEXT: add a6, a6, a0
; RV32-NEXT: andi a5, a5, 1
-; RV32-NEXT: add a1, a3, a1
-; RV32-NEXT: add a6, a2, a0
-; RV32-NEXT: sltu a0, a6, a2
-; RV32-NEXT: add a1, a1, a0
-; RV32-NEXT: sw a6, 0(a4)
-; RV32-NEXT: sw a1, 4(a4)
+; RV32-NEXT: beq a6, a3, .LBB7_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: sltu a0, a6, a3
+; RV32-NEXT: .LBB7_2: # %entry
+; RV32-NEXT: sw a1, 0(a4)
+; RV32-NEXT: sw a6, 4(a4)
; RV32-NEXT: beqz a5, .LBB7_6
-; RV32-NEXT: # %bb.1: # %next
-; RV32-NEXT: beq a3, a1, .LBB7_3
-; RV32-NEXT: # %bb.2: # %next
-; RV32-NEXT: sltu a0, a1, a3
-; RV32-NEXT: .LBB7_3: # %next
+; RV32-NEXT: # %bb.3: # %next
; RV32-NEXT: bnez a0, .LBB7_5
; RV32-NEXT: # %bb.4: # %next
; RV32-NEXT: li a2, 42
@@ -1076,41 +1076,37 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
; RV32-NEXT: .cfi_offset s4, -24
; RV32-NEXT: .cfi_offset s5, -28
; RV32-NEXT: .cfi_offset s6, -32
-; RV32-NEXT: mv s5, a5
-; RV32-NEXT: mv s3, a1
+; RV32-NEXT: mv s1, a5
+; RV32-NEXT: mv s4, a1
; RV32-NEXT: andi a1, a5, 1
-; RV32-NEXT: beqz a1, .LBB32_8
+; RV32-NEXT: beqz a1, .LBB32_6
; RV32-NEXT: # %bb.1: # %t
; RV32-NEXT: mv s0, a4
-; RV32-NEXT: mv s2, a3
-; RV32-NEXT: mv s1, a2
-; RV32-NEXT: mv s4, a0
-; RV32-NEXT: beq s3, a3, .LBB32_3
+; RV32-NEXT: mv s3, a3
+; RV32-NEXT: mv s2, a2
+; RV32-NEXT: mv s5, a0
+; RV32-NEXT: beq s4, a3, .LBB32_3
; RV32-NEXT: # %bb.2: # %t
-; RV32-NEXT: sltu s6, s3, s2
+; RV32-NEXT: sltu s6, s4, s3
; RV32-NEXT: j .LBB32_4
; RV32-NEXT: .LBB32_3:
-; RV32-NEXT: sltu s6, s4, s1
+; RV32-NEXT: sltu s6, s5, s2
; RV32-NEXT: .LBB32_4: # %t
; RV32-NEXT: mv a0, s6
; RV32-NEXT: call call
-; RV32-NEXT: beqz s6, .LBB32_8
+; RV32-NEXT: beqz s6, .LBB32_6
; RV32-NEXT: # %bb.5: # %end
-; RV32-NEXT: sltu a1, s4, s1
-; RV32-NEXT: mv a0, a1
-; RV32-NEXT: beq s3, s2, .LBB32_7
-; RV32-NEXT: # %bb.6: # %end
-; RV32-NEXT: sltu a0, s3, s2
-; RV32-NEXT: .LBB32_7: # %end
-; RV32-NEXT: sub a2, s3, s2
-; RV32-NEXT: sub a3, s4, s1
-; RV32-NEXT: sub a2, a2, a1
-; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: j .LBB32_9
-; RV32-NEXT: .LBB32_8: # %f
-; RV32-NEXT: mv a0, s5
-; RV32-NEXT: .LBB32_9: # %f
+; RV32-NEXT: sltu a0, s5, s2
+; RV32-NEXT: sub a1, s4, s3
+; RV32-NEXT: sub a2, s5, s2
+; RV32-NEXT: sub a1, a1, a0
+; RV32-NEXT: sw a2, 0(s0)
+; RV32-NEXT: sw a1, 4(s0)
+; RV32-NEXT: mv a0, s6
+; RV32-NEXT: j .LBB32_7
+; RV32-NEXT: .LBB32_6: # %f
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: .LBB32_7: # %f
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index ead79fcf53d8b..67dac88b86560 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -59,6 +59,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: sltu t5, a0, t6
; RV32-NEXT: sltu t6, a2, t3
; RV32-NEXT: and t5, t5, t6
+; RV32-NEXT: sltu t1, a6, t1
; RV32-NEXT: sltu t4, a0, t4
; RV32-NEXT: sltu t3, a4, t3
; RV32-NEXT: and t3, t4, t3
@@ -66,7 +67,6 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: srli t4, t4, 31
; RV32-NEXT: or t4, t5, t4
; RV32-NEXT: or t5, a1, a5
-; RV32-NEXT: sltu t1, a6, t1
; RV32-NEXT: srli t5, t5, 31
; RV32-NEXT: or t3, t3, t5
; RV32-NEXT: or t3, t4, t3
diff --git a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 1962ddebc2115..5a5feaa7734e9 100644
--- a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -34,16 +34,16 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: .cfi_offset %edi, -16
; CHECK-NEXT: .cfi_offset %ebx, -12
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: Ltmp0:
-; CHECK-NEXT: ## implicit-def: $ebx
+; CHECK-NEXT: Ltmp0: ## EH_LABEL
+; CHECK-NEXT: ## implicit-def: $edi
; CHECK-NEXT: calll __Znam
-; CHECK-NEXT: Ltmp1:
+; CHECK-NEXT: Ltmp1: ## EH_LABEL
; CHECK-NEXT: ## %bb.1: ## %bb11
; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: movb $1, %bl
+; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_2
; CHECK-NEXT: ## %bb.7: ## %bb31
; CHECK-NEXT: ## implicit-def: $eax
@@ -53,23 +53,20 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## Child Loop BB0_13 Depth 2
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
; CHECK-NEXT: ## Child Loop BB0_21 Depth 2
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_9
; CHECK-NEXT: ## %bb.10: ## %bb41
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
-; CHECK-NEXT: Ltmp2:
+; CHECK-NEXT: Ltmp2: ## EH_LABEL
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %esi, (%esp)
; CHECK-NEXT: calll _Pjii
-; CHECK-NEXT: Ltmp3:
+; CHECK-NEXT: Ltmp3: ## EH_LABEL
; CHECK-NEXT: ## %bb.11: ## %bb42
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: decl %eax
-; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_18
; CHECK-NEXT: ## %bb.12: ## %bb45.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
@@ -78,8 +75,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## => This Loop Header: Depth=2
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
-; CHECK-NEXT: movb $1, %cl
-; CHECK-NEXT: testb %cl, %cl
+; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_19
; CHECK-NEXT: ## %bb.14: ## %bb48
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
@@ -88,14 +84,14 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl %esi, %edx
-; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
; CHECK-NEXT: LBB0_16: ## %bb49
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=2
; CHECK-NEXT: ## => This Inner Loop Header: Depth=3
; CHECK-NEXT: incl %ecx
; CHECK-NEXT: addl $4, %edx
-; CHECK-NEXT: decl %ebx
+; CHECK-NEXT: decl %edi
; CHECK-NEXT: jne LBB0_16
; CHECK-NEXT: LBB0_17: ## %bb57
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
@@ -107,13 +103,12 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, (%esp)
; CHECK-NEXT: calll ___bzero
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne LBB0_22
; CHECK-NEXT: ## %bb.20: ## %bb61.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; CHECK-NEXT: LBB0_21: ## %bb61
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
@@ -126,24 +121,24 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: decl {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: jmp LBB0_8
; CHECK-NEXT: LBB0_18: ## %bb43
-; CHECK-NEXT: Ltmp5:
-; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: Ltmp5: ## EH_LABEL
+; CHECK-NEXT: movl %esi, %edi
; CHECK-NEXT: calll _OnOverFlow
-; CHECK-NEXT: Ltmp6:
+; CHECK-NEXT: Ltmp6: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_3
; CHECK-NEXT: LBB0_2: ## %bb29
-; CHECK-NEXT: Ltmp7:
-; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: Ltmp7: ## EH_LABEL
+; CHECK-NEXT: movl %esi, %edi
; CHECK-NEXT: calll _OnOverFlow
-; CHECK-NEXT: Ltmp8:
+; CHECK-NEXT: Ltmp8: ## EH_LABEL
; CHECK-NEXT: LBB0_3: ## %bb30
; CHECK-NEXT: ud2
; CHECK-NEXT: LBB0_4: ## %bb20.loopexit
-; CHECK-NEXT: Ltmp4:
+; CHECK-NEXT: Ltmp4: ## EH_LABEL
; CHECK-NEXT: LBB0_9:
-; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %esi, %edi
; CHECK-NEXT: LBB0_6: ## %bb23
-; CHECK-NEXT: testl %ebx, %ebx
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: addl $28, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
@@ -151,7 +146,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
; CHECK-NEXT: LBB0_5: ## %bb20.loopexit.split-lp
-; CHECK-NEXT: Ltmp9:
+; CHECK-NEXT: Ltmp9: ## EH_LABEL
; CHECK-NEXT: jmp LBB0_6
; CHECK-NEXT: Lfunc_end0:
bb:
diff --git a/llvm/test/CodeGen/X86/pr166534.ll b/llvm/test/CodeGen/X86/pr166534.ll
index aef44cc3e40d0..162a0c93bfcf4 100644
--- a/llvm/test/CodeGen/X86/pr166534.ll
+++ b/llvm/test/CodeGen/X86/pr166534.ll
@@ -7,100 +7,64 @@
define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
; SSE2-LABEL: pr166534:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movq (%rdi), %rax
-; SSE2-NEXT: movq 8(%rdi), %r8
; SSE2-NEXT: movdqu (%rdi), %xmm0
-; SSE2-NEXT: movq (%rsi), %r9
-; SSE2-NEXT: movq 8(%rsi), %rdi
; SSE2-NEXT: movdqu (%rsi), %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: pmovmskb %xmm1, %esi
-; SSE2-NEXT: xorl %r10d, %r10d
+; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
-; SSE2-NEXT: sete %r10b
-; SSE2-NEXT: orq %r10, (%rdx)
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: orq %rax, (%rdx)
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
; SSE2-NEXT: jne .LBB0_2
; SSE2-NEXT: # %bb.1: # %if.then
-; SSE2-NEXT: xorq %r9, %rax
-; SSE2-NEXT: xorq %rdi, %r8
-; SSE2-NEXT: xorl %edx, %edx
-; SSE2-NEXT: orq %rax, %r8
-; SSE2-NEXT: sete %dl
-; SSE2-NEXT: orq %rdx, (%rcx)
+; SSE2-NEXT: orq %rax, (%rcx)
; SSE2-NEXT: .LBB0_2: # %if.end
; SSE2-NEXT: retq
;
; SSE4-LABEL: pr166534:
; SSE4: # %bb.0: # %entry
-; SSE4-NEXT: movq (%rdi), %rax
-; SSE4-NEXT: movq 8(%rdi), %r8
; SSE4-NEXT: movdqu (%rdi), %xmm0
-; SSE4-NEXT: movq (%rsi), %r9
-; SSE4-NEXT: movq 8(%rsi), %rdi
; SSE4-NEXT: movdqu (%rsi), %xmm1
; SSE4-NEXT: pxor %xmm0, %xmm1
-; SSE4-NEXT: xorl %esi, %esi
+; SSE4-NEXT: xorl %eax, %eax
; SSE4-NEXT: ptest %xmm1, %xmm1
-; SSE4-NEXT: sete %sil
-; SSE4-NEXT: orq %rsi, (%rdx)
+; SSE4-NEXT: sete %al
+; SSE4-NEXT: orq %rax, (%rdx)
; SSE4-NEXT: ptest %xmm1, %xmm1
; SSE4-NEXT: jne .LBB0_2
; SSE4-NEXT: # %bb.1: # %if.then
-; SSE4-NEXT: xorq %r9, %rax
-; SSE4-NEXT: xorq %rdi, %r8
-; SSE4-NEXT: xorl %edx, %edx
-; SSE4-NEXT: orq %rax, %r8
-; SSE4-NEXT: sete %dl
-; SSE4-NEXT: orq %rdx, (%rcx)
+; SSE4-NEXT: orq %rax, (%rcx)
; SSE4-NEXT: .LBB0_2: # %if.end
; SSE4-NEXT: retq
;
; AVX2-LABEL: pr166534:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movq (%rdi), %rax
-; AVX2-NEXT: movq 8(%rdi), %r8
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: movq (%rsi), %rdi
; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; AVX2-NEXT: movq 8(%rsi), %rsi
-; AVX2-NEXT: xorl %r9d, %r9d
+; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: vptest %xmm0, %xmm0
-; AVX2-NEXT: sete %r9b
-; AVX2-NEXT: orq %r9, (%rdx)
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: orq %rax, (%rdx)
; AVX2-NEXT: vptest %xmm0, %xmm0
; AVX2-NEXT: jne .LBB0_2
; AVX2-NEXT: # %bb.1: # %if.then
-; AVX2-NEXT: xorq %rdi, %rax
-; AVX2-NEXT: xorq %rsi, %r8
-; AVX2-NEXT: xorl %edx, %edx
-; AVX2-NEXT: orq %rax, %r8
-; AVX2-NEXT: sete %dl
-; AVX2-NEXT: orq %rdx, (%rcx)
+; AVX2-NEXT: orq %rax, (%rcx)
; AVX2-NEXT: .LBB0_2: # %if.end
; AVX2-NEXT: retq
;
; AVX512-LABEL: pr166534:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: movq (%rdi), %rax
-; AVX512-NEXT: movq 8(%rdi), %r8
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
-; AVX512-NEXT: movq (%rsi), %r9
-; AVX512-NEXT: movq 8(%rsi), %rdi
; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
-; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: vptest %xmm0, %xmm0
-; AVX512-NEXT: sete %sil
-; AVX512-NEXT: orq %rsi, (%rdx)
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: orq %rax, (%rdx)
; AVX512-NEXT: vptest %xmm0, %xmm0
; AVX512-NEXT: jne .LBB0_2
; AVX512-NEXT: # %bb.1: # %if.then
-; AVX512-NEXT: xorq %r9, %rax
-; AVX512-NEXT: xorq %rdi, %r8
-; AVX512-NEXT: xorl %edx, %edx
-; AVX512-NEXT: orq %rax, %r8
-; AVX512-NEXT: sete %dl
-; AVX512-NEXT: orq %rdx, (%rcx)
+; AVX512-NEXT: orq %rax, (%rcx)
; AVX512-NEXT: .LBB0_2: # %if.end
; AVX512-NEXT: retq
entry:
More information about the llvm-commits
mailing list