[clang] [llvm] [ValueTracking] Extend computeConstantRange for add/sub, sext/zext/trunc (PR #181110)
Guy David via cfe-commits
cfe-commits at lists.llvm.org
Sun Feb 15 11:28:35 PST 2026
https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/181110
>From 9250d4e7e4f5274db96fab5ee8faa76a594178b9 Mon Sep 17 00:00:00 2001
From: Guy David <guyda96 at gmail.com>
Date: Wed, 11 Feb 2026 16:20:41 +0200
Subject: [PATCH 1/2] [ValueTracking] Extend computeConstantRange for add/sub,
sext/zext/trunc
Recursively compute operand ranges for add/sub and propagate ranges
through sext/zext/trunc.
For add/sub, the computed range is intersected with any existing range
from setLimitsForBinOp, and NSW/NUW flags are used via addWithNoWrap/
subWithNoWrap to tighten bounds.
The motivation is to enable further folding of reduce.add expressions
in comparisons, where the result range can be bounded by the input
element ranges.
Compile-time impact on llvm-test-suite is <0.1% mean.
---
llvm/lib/Analysis/ValueTracking.cpp | 28 ++++++
llvm/test/Analysis/BasicAA/range.ll | 66 +++++++++++++
llvm/unittests/Analysis/ValueTrackingTest.cpp | 92 +++++++++++++++++++
3 files changed, 186 insertions(+)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8761b7bcb51a2..acbb50b8cae53 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -10242,6 +10242,34 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
// TODO: Return ConstantRange.
setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned);
CR = ConstantRange::getNonEmpty(Lower, Upper);
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub) {
+ ConstantRange LHS = computeConstantRange(
+ BO->getOperand(0), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
+ ConstantRange RHS = computeConstantRange(
+ BO->getOperand(1), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
+ unsigned NoWrapKind = 0;
+ if (IIQ.hasNoUnsignedWrap(BO))
+ NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap;
+ if (IIQ.hasNoSignedWrap(BO))
+ NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap;
+ ConstantRange OpCR = BO->getOpcode() == Instruction::Add
+ ? LHS.addWithNoWrap(RHS, NoWrapKind)
+ : LHS.subWithNoWrap(RHS, NoWrapKind);
+ CR = CR.intersectWith(OpCR);
+ }
+ } else if (auto *SExt = dyn_cast<SExtInst>(V)) {
+ CR = computeConstantRange(SExt->getOperand(0), ForSigned, UseInstrInfo, AC,
+ CtxI, DT, Depth + 1)
+ .signExtend(BitWidth);
+ } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
+ CR = computeConstantRange(ZExt->getOperand(0), ForSigned, UseInstrInfo, AC,
+ CtxI, DT, Depth + 1)
+ .zeroExtend(BitWidth);
+ } else if (auto *Trunc = dyn_cast<TruncInst>(V)) {
+ CR = computeConstantRange(Trunc->getOperand(0), ForSigned, UseInstrInfo, AC,
+ CtxI, DT, Depth + 1)
+ .truncate(BitWidth);
} else if (auto *II = dyn_cast<IntrinsicInst>(V))
CR = getRangeForIntrinsic(*II, UseInstrInfo);
else if (auto *SI = dyn_cast<SelectInst>(V)) {
diff --git a/llvm/test/Analysis/BasicAA/range.ll b/llvm/test/Analysis/BasicAA/range.ll
index e5dfb60c8b878..a41fd63ee52f6 100644
--- a/llvm/test/Analysis/BasicAA/range.ll
+++ b/llvm/test/Analysis/BasicAA/range.ll
@@ -271,6 +271,72 @@ entry:
ret i32 %load_
}
+; CHECK-LABEL: Function: zext_propagate_range
+; CHECK: NoAlias: i32* %gep, i32* %gep128
+define void @zext_propagate_range(ptr %p, i8 %idx) {
+ %narrow = and i8 %idx, 127
+ %wide = zext i8 %narrow to i64
+ %gep = getelementptr i32, ptr %p, i64 %wide
+ %gep128 = getelementptr i32, ptr %p, i64 128
+ load i32, ptr %gep
+ load i32, ptr %gep128
+ ret void
+}
+
+; CHECK-LABEL: Function: sext_propagate_range
+; CHECK: NoAlias: i32* %gep, i32* %gep128
+define void @sext_propagate_range(ptr %p, i8 %idx) {
+ %clamped = and i8 %idx, 100
+ %wide = sext i8 %clamped to i64
+ %gep = getelementptr i32, ptr %p, i64 %wide
+ %gep128 = getelementptr i32, ptr %p, i64 128
+ load i32, ptr %gep
+ load i32, ptr %gep128
+ ret void
+}
+
+; CHECK-LABEL: Function: zext_add_range
+; CHECK: NoAlias: i32* %gep, i32* %gep512
+define void @zext_add_range(ptr %p, i8 %x, i8 %y) {
+ %ext.x = zext i8 %x to i64
+ %ext.y = zext i8 %y to i64
+ %sum = add i64 %ext.x, %ext.y
+ %gep = getelementptr i32, ptr %p, i64 %sum
+ %gep512 = getelementptr i32, ptr %p, i64 512
+ load i32, ptr %gep
+ load i32, ptr %gep512
+ ret void
+}
+
+; CHECK-LABEL: Function: zext_sub_range
+; CHECK: NoAlias: i32* %gep, i32* %gep256
+; CHECK: NoAlias: i32* %gep, i32* %gepneg256
+define void @zext_sub_range(ptr %p, i8 %x, i8 %y) {
+ %ext.x = zext i8 %x to i64
+ %ext.y = zext i8 %y to i64
+ %diff = sub i64 %ext.x, %ext.y
+ %gep = getelementptr i32, ptr %p, i64 %diff
+ %gep256 = getelementptr i32, ptr %p, i64 256
+ %gepneg256 = getelementptr i32, ptr %p, i64 -256
+ load i32, ptr %gep
+ load i32, ptr %gep256
+ load i32, ptr %gepneg256
+ ret void
+}
+
+; CHECK-LABEL: Function: trunc_propagate_range
+; CHECK: NoAlias: i32* %gep, i32* %gep64
+define void @trunc_propagate_range(ptr %p, i64 %idx) {
+ %clamped = and i64 %idx, 63
+ %narrow = trunc i64 %clamped to i8
+ %wide = zext i8 %narrow to i64
+ %gep = getelementptr i32, ptr %p, i64 %wide
+ %gep64 = getelementptr i32, ptr %p, i64 64
+ load i32, ptr %gep
+ load i32, ptr %gep64
+ ret void
+}
+
declare void @llvm.assume(i1)
!0 = !{ i32 0, i32 2 }
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 6229d408de2a8..2ee45dccc6595 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -3394,6 +3394,98 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) {
// If we don't know the value of x.2, we don't know the value of x.1.
EXPECT_TRUE(CR1.isFullSet());
}
+ {
+ auto M = parseModule(R"(
+ define void @test(i8 %x) {
+ %sext = sext i8 %x to i32
+ %zext = zext i8 %x to i32
+ ret void
+ })");
+ Function *F = M->getFunction("test");
+ AssumptionCache AC(*F);
+ Instruction *SExt = &findInstructionByName(F, "sext");
+ Instruction *ZExt = &findInstructionByName(F, "zext");
+ ConstantRange SExtCR = computeConstantRange(SExt, true, true, &AC, SExt);
+ EXPECT_EQ(SExtCR.getSignedMin().getSExtValue(), -128);
+ EXPECT_EQ(SExtCR.getSignedMax().getSExtValue(), 127);
+ ConstantRange ZExtCR = computeConstantRange(ZExt, false, true, &AC, ZExt);
+ EXPECT_EQ(ZExtCR.getUnsignedMin().getZExtValue(), 0u);
+ EXPECT_EQ(ZExtCR.getUnsignedMax().getZExtValue(), 255u);
+ }
+ {
+ auto M = parseModule(R"(
+ define i32 @test(i8 %x) {
+ %ext = sext i8 %x to i32
+ %add = add nsw i32 %ext, 10
+ ret i32 %add
+ })");
+ Function *F = M->getFunction("test");
+ AssumptionCache AC(*F);
+ Instruction *Add = &findInstructionByName(F, "add");
+ ConstantRange CR = computeConstantRange(Add, true, true, &AC, Add);
+ EXPECT_EQ(CR.getSignedMin().getSExtValue(), -118);
+ EXPECT_EQ(CR.getSignedMax().getSExtValue(), 137);
+ }
+ {
+ auto M = parseModule(R"(
+ define i32 @test(i8 %x, i8 %y) {
+ %ext.x = zext i8 %x to i32
+ %ext.y = zext i8 %y to i32
+ %sub = sub i32 %ext.x, %ext.y
+ ret i32 %sub
+ })");
+ Function *F = M->getFunction("test");
+ AssumptionCache AC(*F);
+ Instruction *Sub = &findInstructionByName(F, "sub");
+ ConstantRange CR = computeConstantRange(Sub, true, true, &AC, Sub);
+ EXPECT_EQ(CR.getSignedMin().getSExtValue(), -255);
+ EXPECT_EQ(CR.getSignedMax().getSExtValue(), 255);
+ }
+ {
+ // trunc
+ auto M = parseModule(R"(
+ define void @test(i32 %x) {
+ %narrow = trunc i32 %x to i8
+ ret void
+ })");
+ Function *F = M->getFunction("test");
+ AssumptionCache AC(*F);
+ Instruction *Trunc = &findInstructionByName(F, "narrow");
+ ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc);
+ EXPECT_TRUE(CR.isFullSet());
+ EXPECT_EQ(CR.getBitWidth(), 8u);
+ }
+ {
+ // trunc with restricted input range
+ auto M = parseModule(R"(
+ define i8 @test(i32 %x) {
+ %clamped = and i32 %x, 127
+ %narrow = trunc i32 %clamped to i8
+ ret i8 %narrow
+ })");
+ Function *F = M->getFunction("test");
+ AssumptionCache AC(*F);
+ Instruction *Trunc = &findInstructionByName(F, "narrow");
+ ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc);
+ EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u);
+ EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 127u);
+ }
+ {
+ // Chained adds from i1
+ auto M = parseModule(R"(
+ define i32 @test(i1 %x) {
+ %ext = sext i1 %x to i32
+ %add1 = add nsw i32 %ext, %ext
+ %add2 = add nsw i32 %add1, %ext
+ ret i32 %add2
+ })");
+ Function *F = M->getFunction("test");
+ AssumptionCache AC(*F);
+ Instruction *Add2 = &findInstructionByName(F, "add2");
+ ConstantRange CR = computeConstantRange(Add2, true, true, &AC, Add2);
+ EXPECT_EQ(CR.getSignedMin().getSExtValue(), -3);
+ EXPECT_EQ(CR.getSignedMax().getSExtValue(), 0);
+ }
}
struct FindAllocaForValueTestParams {
>From 4bfbb084f7253f3b848e11800413e2e7e9bab35d Mon Sep 17 00:00:00 2001
From: Guy David <guyda96 at gmail.com>
Date: Thu, 12 Feb 2026 13:28:29 +0200
Subject: [PATCH 2/2] Address comments
---
clang/test/CodeGen/isfpclass.c | 4 +-
clang/test/Headers/__clang_hip_math.hip | 48 +-
clang/test/Headers/wasm.c | 32 +-
llvm/lib/Analysis/ValueTracking.cpp | 100 ++--
llvm/test/CodeGen/AMDGPU/div_v2i128.ll | 434 ++++++++----------
llvm/test/CodeGen/AMDGPU/sdiv64.ll | 27 +-
llvm/test/CodeGen/AMDGPU/srem64.ll | 95 ++--
llvm/test/CodeGen/AMDGPU/udiv64.ll | 34 +-
llvm/test/CodeGen/AMDGPU/urem64.ll | 50 +-
llvm/test/CodeGen/PowerPC/add_cmp.ll | 12 +-
llvm/test/Transforms/Attributor/range.ll | 20 +-
llvm/test/Transforms/InstCombine/add.ll | 4 +-
llvm/test/Transforms/InstCombine/fls.ll | 2 +-
llvm/test/Transforms/InstCombine/icmp-add.ll | 3 +-
llvm/test/Transforms/InstCombine/pr80597.ll | 9 +-
llvm/test/Transforms/InstCombine/sadd_sat.ll | 10 +-
.../InstCombine/saturating-add-sub.ll | 5 +-
llvm/unittests/Analysis/ValueTrackingTest.cpp | 15 +
18 files changed, 417 insertions(+), 487 deletions(-)
diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c
index 4c6d556e008e5..1465b43149fcc 100644
--- a/clang/test/CodeGen/isfpclass.c
+++ b/clang/test/CodeGen/isfpclass.c
@@ -136,7 +136,7 @@ typedef double __attribute__((ext_vector_type(4))) double4;
typedef int __attribute__((ext_vector_type(4))) int4;
typedef long __attribute__((ext_vector_type(4))) long4;
-// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32(
+// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32(
// CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer
@@ -147,7 +147,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) {
return __builtin_isfpclass(x, 3 /*NaN*/);
}
-// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32(
+// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32(
// CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]]
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index 68a8666e41856..0a9c757aabf55 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -2653,7 +2653,7 @@ extern "C" __device__ int test_ilogb(double x) {
return ilogb(x);
}
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef(
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2666,7 +2666,7 @@ extern "C" __device__ int test_ilogb(double x) {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: ret i32 1
//
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2674,7 +2674,7 @@ extern "C" __device__ int test_ilogb(double x) {
// APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// APPROX-NEXT: ret i32 [[CONV]]
//
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2682,7 +2682,7 @@ extern "C" __device__ int test_ilogb(double x) {
// NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// NCRDIV-NEXT: ret i32 [[CONV]]
//
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finitef(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___finitef(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]])
@@ -2694,7 +2694,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
return __finitef(x);
}
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite(
// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2707,7 +2707,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: ret i32 1
//
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite(
// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2715,7 +2715,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
// APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// APPROX-NEXT: ret i32 [[CONV]]
//
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finite(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite(
// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2723,7 +2723,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) {
// NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// NCRDIV-NEXT: ret i32 [[CONV]]
//
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finite(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___finite(
// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]])
@@ -2735,7 +2735,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
return __finite(x);
}
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff(
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2748,7 +2748,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: ret i32 0
//
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2756,7 +2756,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
// APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// APPROX-NEXT: ret i32 [[CONV]]
//
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
@@ -2764,7 +2764,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) {
// NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// NCRDIV-NEXT: ret i32 [[CONV]]
//
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinff(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isinff(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]])
@@ -2776,7 +2776,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
return __isinff(x);
}
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf(
// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2789,7 +2789,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: ret i32 0
//
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf(
// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2797,7 +2797,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
// APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// APPROX-NEXT: ret i32 [[CONV]]
//
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf(
// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]])
@@ -2805,7 +2805,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) {
// NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32
// NCRDIV-NEXT: ret i32 [[CONV]]
//
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinf(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isinf(
// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]])
@@ -2817,7 +2817,7 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) {
return __isinf(x);
}
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf(
// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
@@ -2829,21 +2829,21 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: ret i32 0
//
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf(
// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
// APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32
// APPROX-NEXT: ret i32 [[CONV]]
//
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf(
// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
// NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32
// NCRDIV-NEXT: ret i32 [[CONV]]
//
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnanf(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isnanf(
// AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00
@@ -2854,7 +2854,7 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) {
return __isnanf(x);
}
-// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan(
// DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
@@ -2866,21 +2866,21 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
// FINITEONLY-NEXT: ret i32 0
//
-// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan(
// APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
// APPROX-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
// APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32
// APPROX-NEXT: ret i32 [[CONV]]
//
-// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan(
+// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan(
// NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// NCRDIV-NEXT: [[ENTRY:.*:]]
// NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
// NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32
// NCRDIV-NEXT: ret i32 [[CONV]]
//
-// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnan(
+// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isnan(
// AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] {
// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00
diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c
index 2545a014e4340..fdce091fe640e 100644
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@@ -1234,7 +1234,7 @@ v128_t test_u16x8_ge(v128_t a, v128_t b) {
return wasm_u16x8_ge(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_eq(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_eq(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]]
@@ -1245,7 +1245,7 @@ v128_t test_i32x4_eq(v128_t a, v128_t b) {
return wasm_i32x4_eq(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ne(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_ne(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[A]], [[B]]
@@ -1256,7 +1256,7 @@ v128_t test_i32x4_ne(v128_t a, v128_t b) {
return wasm_i32x4_ne(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_lt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_lt(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A]], [[B]]
@@ -1267,7 +1267,7 @@ v128_t test_i32x4_lt(v128_t a, v128_t b) {
return wasm_i32x4_lt(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_lt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_lt(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A]], [[B]]
@@ -1278,7 +1278,7 @@ v128_t test_u32x4_lt(v128_t a, v128_t b) {
return wasm_u32x4_lt(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_gt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_gt(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A]], [[B]]
@@ -1289,7 +1289,7 @@ v128_t test_i32x4_gt(v128_t a, v128_t b) {
return wasm_i32x4_gt(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_gt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_gt(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A]], [[B]]
@@ -1300,7 +1300,7 @@ v128_t test_u32x4_gt(v128_t a, v128_t b) {
return wasm_u32x4_gt(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_le(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_le(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A]], [[B]]
@@ -1311,7 +1311,7 @@ v128_t test_i32x4_le(v128_t a, v128_t b) {
return wasm_i32x4_le(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_le(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_le(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A]], [[B]]
@@ -1322,7 +1322,7 @@ v128_t test_u32x4_le(v128_t a, v128_t b) {
return wasm_u32x4_le(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ge(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_ge(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A]], [[B]]
@@ -1333,7 +1333,7 @@ v128_t test_i32x4_ge(v128_t a, v128_t b) {
return wasm_i32x4_ge(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_ge(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_ge(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A]], [[B]]
@@ -1428,7 +1428,7 @@ v128_t test_i64x2_ge(v128_t a, v128_t b) {
return wasm_i64x2_ge(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_eq(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_eq(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1441,7 +1441,7 @@ v128_t test_f32x4_eq(v128_t a, v128_t b) {
return wasm_f32x4_eq(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ne(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_ne(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1454,7 +1454,7 @@ v128_t test_f32x4_ne(v128_t a, v128_t b) {
return wasm_f32x4_ne(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_lt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_lt(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1467,7 +1467,7 @@ v128_t test_f32x4_lt(v128_t a, v128_t b) {
return wasm_f32x4_lt(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_gt(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_gt(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1480,7 +1480,7 @@ v128_t test_f32x4_gt(v128_t a, v128_t b) {
return wasm_f32x4_gt(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_le(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_le(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
@@ -1493,7 +1493,7 @@ v128_t test_f32x4_le(v128_t a, v128_t b) {
return wasm_f32x4_le(a, b);
}
-// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ge(
+// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_ge(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float>
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index acbb50b8cae53..62621dd87441c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -9818,10 +9818,14 @@ std::optional<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred,
return std::nullopt;
}
-static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
- APInt &Upper, const InstrInfoQuery &IIQ,
- bool PreferSignedRange) {
- unsigned Width = Lower.getBitWidth();
+static ConstantRange getRangeForBinOp(const BinaryOperator &BO, bool ForSigned,
+ bool UseInstrInfo, AssumptionCache *AC,
+ const Instruction *CtxI,
+ const DominatorTree *DT, unsigned Depth) {
+ unsigned Width = BO.getType()->getScalarSizeInBits();
+ InstrInfoQuery IIQ(UseInstrInfo);
+ APInt Lower = APInt(Width, 0);
+ APInt Upper = APInt(Width, 0);
const APInt *C;
switch (BO.getOpcode()) {
case Instruction::Sub:
@@ -9834,7 +9838,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
// is never larger than the signed range. Example:
// "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126].
// "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127].
- if (PreferSignedRange && HasNSW && HasNUW)
+ if (ForSigned && HasNSW && HasNUW)
HasNUW = false;
if (HasNUW) {
@@ -9863,7 +9867,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
// range. Otherwise if both no-wraps are set, use the unsigned range
// because it is never larger than the signed range. Example: "add nuw
// nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
- if (PreferSignedRange && HasNSW && HasNUW)
+ if (ForSigned && HasNSW && HasNUW)
HasNUW = false;
if (HasNUW) {
@@ -10041,6 +10045,34 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
default:
break;
}
+
+ ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
+ bool IsDisjointOr = BO.getOpcode() == Instruction::Or &&
+ cast<PossiblyDisjointInst>(&BO)->isDisjoint();
+ if (BO.getOpcode() == Instruction::Add ||
+ BO.getOpcode() == Instruction::Sub || IsDisjointOr) {
+ // Limit recursion depth more aggressively for binary operations.
+ unsigned NewDepth = std::max(Depth * 2, 1u);
+ ConstantRange LHS = computeConstantRange(
+ BO.getOperand(0), ForSigned, UseInstrInfo, AC, CtxI, DT, NewDepth);
+ ConstantRange RHS = computeConstantRange(
+ BO.getOperand(1), ForSigned, UseInstrInfo, AC, CtxI, DT, NewDepth);
+ unsigned NoWrapKind = 0;
+ // Only Add and Sub have no-wrap flags, not disjoint Or.
+ if (!IsDisjointOr) {
+ if (IIQ.hasNoUnsignedWrap(&BO))
+ NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap;
+ if (IIQ.hasNoSignedWrap(&BO))
+ NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap;
+ }
+ // Disjoint OR is semantically equivalent to Add.
+ ConstantRange OpCR = BO.getOpcode() == Instruction::Sub
+ ? LHS.subWithNoWrap(RHS, NoWrapKind)
+ : LHS.addWithNoWrap(RHS, NoWrapKind);
+ CR = CR.intersectWith(OpCR, ForSigned ? ConstantRange::Signed
+ : ConstantRange::Unsigned);
+ }
+ return CR;
}
static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II,
@@ -10237,39 +10269,31 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
InstrInfoQuery IIQ(UseInstrInfo);
ConstantRange CR = ConstantRange::getFull(BitWidth);
if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+ CR = getRangeForBinOp(*BO, ForSigned, UseInstrInfo, AC, CtxI, DT, Depth);
+ } else if (isa<SExtInst>(V) || isa<ZExtInst>(V) || isa<TruncInst>(V)) {
+ auto *CastOp = cast<CastInst>(V);
+ ConstantRange OpCR =
+ computeConstantRange(CastOp->getOperand(0), ForSigned, UseInstrInfo, AC,
+ CtxI, DT, Depth + 1);
+ switch (CastOp->getOpcode()) {
+ case Instruction::SExt:
+ CR = OpCR.signExtend(BitWidth);
+ break;
+ case Instruction::ZExt:
+ CR = OpCR.zeroExtend(BitWidth);
+ break;
+ case Instruction::Trunc:
+ CR = OpCR.truncate(BitWidth);
+ break;
+ default:
+ llvm_unreachable("Unexpected cast opcode");
+ }
+ } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) {
APInt Lower = APInt(BitWidth, 0);
APInt Upper = APInt(BitWidth, 0);
// TODO: Return ConstantRange.
- setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned);
+ setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
CR = ConstantRange::getNonEmpty(Lower, Upper);
- if (BO->getOpcode() == Instruction::Add ||
- BO->getOpcode() == Instruction::Sub) {
- ConstantRange LHS = computeConstantRange(
- BO->getOperand(0), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
- ConstantRange RHS = computeConstantRange(
- BO->getOperand(1), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
- unsigned NoWrapKind = 0;
- if (IIQ.hasNoUnsignedWrap(BO))
- NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap;
- if (IIQ.hasNoSignedWrap(BO))
- NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap;
- ConstantRange OpCR = BO->getOpcode() == Instruction::Add
- ? LHS.addWithNoWrap(RHS, NoWrapKind)
- : LHS.subWithNoWrap(RHS, NoWrapKind);
- CR = CR.intersectWith(OpCR);
- }
- } else if (auto *SExt = dyn_cast<SExtInst>(V)) {
- CR = computeConstantRange(SExt->getOperand(0), ForSigned, UseInstrInfo, AC,
- CtxI, DT, Depth + 1)
- .signExtend(BitWidth);
- } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
- CR = computeConstantRange(ZExt->getOperand(0), ForSigned, UseInstrInfo, AC,
- CtxI, DT, Depth + 1)
- .zeroExtend(BitWidth);
- } else if (auto *Trunc = dyn_cast<TruncInst>(V)) {
- CR = computeConstantRange(Trunc->getOperand(0), ForSigned, UseInstrInfo, AC,
- CtxI, DT, Depth + 1)
- .truncate(BitWidth);
} else if (auto *II = dyn_cast<IntrinsicInst>(V))
CR = getRangeForIntrinsic(*II, UseInstrInfo);
else if (auto *SI = dyn_cast<SelectInst>(V)) {
@@ -10279,12 +10303,6 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
CR = CRTrue.unionWith(CRFalse);
CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ));
- } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) {
- APInt Lower = APInt(BitWidth, 0);
- APInt Upper = APInt(BitWidth, 0);
- // TODO: Return ConstantRange.
- setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
- CR = ConstantRange::getNonEmpty(Lower, Upper);
} else if (const auto *A = dyn_cast<Argument>(V))
if (std::optional<ConstantRange> Range = A->getRange())
CR = *Range;
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 52410c6d3698e..97b460b32507b 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -857,28 +857,23 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; SDAG-NEXT: v_sub_i32_e32 v2, vcc, 0x5e, v0
; SDAG-NEXT: v_subb_u32_e32 v3, vcc, 0, v3, vcc
-; SDAG-NEXT: v_xor_b32_e32 v0, 0x7f, v2
; SDAG-NEXT: v_subb_u32_e32 v8, vcc, 0, v9, vcc
; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[2:3]
-; SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc
-; SDAG-NEXT: v_or_b32_e32 v0, v0, v8
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
-; SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v1, v3, v9
+; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
-; SDAG-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; SDAG-NEXT: v_and_b32_e32 v0, 1, v14
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v0
-; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; SDAG-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[4:5]
-; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
+; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; SDAG-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v16, v13, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v17, v12, 0, s[4:5]
-; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; SDAG-NEXT: s_cbranch_execz .LBB1_6
; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
; SDAG-NEXT: v_add_i32_e32 v20, vcc, 1, v2
@@ -1015,28 +1010,23 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
; SDAG-NEXT: v_sub_i32_e32 v2, vcc, 0x5e, v2
; SDAG-NEXT: v_subb_u32_e32 v3, vcc, 0, v4, vcc
-; SDAG-NEXT: v_xor_b32_e32 v6, 0x7f, v2
; SDAG-NEXT: v_subb_u32_e32 v4, vcc, 0, v12, vcc
; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[2:3]
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7]
; SDAG-NEXT: v_subb_u32_e32 v5, vcc, 0, v12, vcc
-; SDAG-NEXT: v_or_b32_e32 v6, v6, v4
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v7, v3, v5
+; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
-; SDAG-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; SDAG-NEXT: v_and_b32_e32 v6, 1, v12
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v6
-; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
+; SDAG-NEXT: v_and_b32_e32 v6, 1, v6
+; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
+; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; SDAG-NEXT: v_cndmask_b32_e64 v13, v9, 0, s[4:5]
-; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
+; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; SDAG-NEXT: v_cndmask_b32_e64 v12, v8, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v7, v11, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v6, v10, 0, s[4:5]
-; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; SDAG-NEXT: s_cbranch_execz .LBB1_12
; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
; SDAG-NEXT: v_add_i32_e32 v22, vcc, 1, v2
@@ -1165,7 +1155,7 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_mov_b64 s[8:9], 0
; GISEL-NEXT: v_ashrrev_i32_e32 v18, 31, v3
-; GISEL-NEXT: v_mov_b32_e32 v14, 0x5e
+; GISEL-NEXT: v_mov_b32_e32 v12, 0x5e
; GISEL-NEXT: v_mov_b32_e32 v8, 0x7f
; GISEL-NEXT: v_mov_b32_e32 v9, 0
; GISEL-NEXT: v_xor_b32_e32 v0, v18, v0
@@ -1174,49 +1164,41 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: v_xor_b32_e32 v3, v18, v3
; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v0, v18
; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v1, v18, vcc
-; GISEL-NEXT: v_subb_u32_e32 v12, vcc, v2, v18, vcc
-; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v18, vcc
+; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v2, v18, vcc
+; GISEL-NEXT: v_subb_u32_e32 v15, vcc, v3, v18, vcc
; GISEL-NEXT: v_ffbh_u32_e32 v2, v11
; GISEL-NEXT: v_ffbh_u32_e32 v3, v10
-; GISEL-NEXT: v_or_b32_e32 v0, v10, v12
-; GISEL-NEXT: v_or_b32_e32 v1, v11, v13
+; GISEL-NEXT: v_or_b32_e32 v0, v10, v14
+; GISEL-NEXT: v_or_b32_e32 v1, v11, v15
; GISEL-NEXT: v_add_i32_e32 v3, vcc, 32, v3
-; GISEL-NEXT: v_ffbh_u32_e32 v15, v13
-; GISEL-NEXT: v_ffbh_u32_e32 v16, v12
+; GISEL-NEXT: v_ffbh_u32_e32 v13, v15
+; GISEL-NEXT: v_ffbh_u32_e32 v16, v14
; GISEL-NEXT: v_min_u32_e32 v2, v2, v3
; GISEL-NEXT: v_add_i32_e32 v3, vcc, 32, v16
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v2
-; GISEL-NEXT: v_min_u32_e32 v1, v15, v3
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
+; GISEL-NEXT: v_min_u32_e32 v1, v13, v3
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v14, v0
+; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v0
; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
-; GISEL-NEXT: v_or_b32_e32 v9, v3, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v9, v16, v14
-; GISEL-NEXT: v_and_b32_e32 v14, 1, v9
-; GISEL-NEXT: v_or_b32_e32 v8, v9, v8
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
+; GISEL-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
+; GISEL-NEXT: v_or_b32_e32 v8, v16, v8
+; GISEL-NEXT: v_and_b32_e32 v8, 1, v8
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; GISEL-NEXT: v_cndmask_b32_e64 v16, v10, 0, vcc
-; GISEL-NEXT: v_and_b32_e32 v14, 1, v8
-; GISEL-NEXT: v_cndmask_b32_e64 v17, v11, 0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, 0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v9, v13, 0, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT: v_cndmask_b32_e64 v17, v11, 0, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v8, v14, 0, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v15, 0, vcc
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB1_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
@@ -1226,23 +1208,23 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: v_not_b32_e32 v2, 63
; GISEL-NEXT: v_addc_u32_e64 v21, vcc, 0, v0, s[4:5]
; GISEL-NEXT: v_addc_u32_e32 v22, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v23, v2
+; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v23, v2
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 64, v23
; GISEL-NEXT: v_lshl_b64 v[0:1], v[10:11], v23
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v23
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], v23
; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
; GISEL-NEXT: v_lshr_b64 v[8:9], v[10:11], v8
-; GISEL-NEXT: v_lshl_b64 v[16:17], v[10:11], v14
+; GISEL-NEXT: v_lshl_b64 v[16:17], v[10:11], v12
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
-; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v1, vcc
; GISEL-NEXT: v_or_b32_e32 v0, v8, v2
; GISEL-NEXT: v_or_b32_e32 v1, v9, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v23
-; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v12, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v13, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v14, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v15, vcc
; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
; GISEL-NEXT: v_mov_b32_e32 v0, s8
; GISEL-NEXT: v_mov_b32_e32 v1, s9
@@ -1254,22 +1236,22 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
; GISEL-NEXT: v_add_i32_e32 v16, vcc, 0xffffffc0, v19
; GISEL-NEXT: v_sub_i32_e32 v17, vcc, 64, v19
-; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v19
+; GISEL-NEXT: v_lshr_b64 v[0:1], v[14:15], v19
; GISEL-NEXT: v_lshr_b64 v[2:3], v[10:11], v19
; GISEL-NEXT: s_mov_b64 s[8:9], 0
-; GISEL-NEXT: v_lshl_b64 v[23:24], v[12:13], v17
-; GISEL-NEXT: v_lshr_b64 v[12:13], v[12:13], v16
+; GISEL-NEXT: v_lshl_b64 v[23:24], v[14:15], v17
+; GISEL-NEXT: v_lshr_b64 v[14:15], v[14:15], v16
; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v1, vcc
; GISEL-NEXT: v_or_b32_e32 v0, v2, v23
; GISEL-NEXT: v_or_b32_e32 v1, v3, v24
-; GISEL-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v14, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v13, v1, v11, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v14, v0, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v15, v1, v11, vcc
; GISEL-NEXT: v_mov_b32_e32 v11, 0
; GISEL-NEXT: v_mov_b32_e32 v0, s8
; GISEL-NEXT: v_mov_b32_e32 v1, s9
@@ -1277,25 +1259,25 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: v_mov_b32_e32 v3, s11
; GISEL-NEXT: .LBB1_3: ; %udiv-do-while3
; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v15
-; GISEL-NEXT: v_lshl_b64 v[23:24], v[12:13], 1
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1
+; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v13
+; GISEL-NEXT: v_lshl_b64 v[23:24], v[14:15], 1
; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v12, 31, v13
-; GISEL-NEXT: v_lshrrev_b32_e32 v13, 31, v9
+; GISEL-NEXT: v_lshrrev_b32_e32 v14, 31, v15
+; GISEL-NEXT: v_lshrrev_b32_e32 v15, 31, v9
; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
; GISEL-NEXT: v_add_i32_e32 v19, vcc, -1, v19
; GISEL-NEXT: v_addc_u32_e32 v20, vcc, -1, v20, vcc
-; GISEL-NEXT: v_or_b32_e32 v14, v0, v2
-; GISEL-NEXT: v_or_b32_e32 v15, v1, v3
-; GISEL-NEXT: v_or_b32_e32 v2, v16, v12
-; GISEL-NEXT: v_or_b32_e32 v0, v23, v13
+; GISEL-NEXT: v_or_b32_e32 v12, v0, v2
+; GISEL-NEXT: v_or_b32_e32 v13, v1, v3
+; GISEL-NEXT: v_or_b32_e32 v2, v16, v14
+; GISEL-NEXT: v_or_b32_e32 v0, v23, v15
; GISEL-NEXT: v_or_b32_e32 v8, v8, v10
; GISEL-NEXT: v_addc_u32_e32 v21, vcc, -1, v21, vcc
; GISEL-NEXT: v_addc_u32_e32 v22, vcc, -1, v22, vcc
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, 1, v24
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, 0, v2, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v12, s[4:5], 0, v0
+; GISEL-NEXT: v_subrev_i32_e64 v14, s[4:5], 0, v0
; GISEL-NEXT: v_or_b32_e32 v0, v19, v21
; GISEL-NEXT: v_or_b32_e32 v1, v20, v22
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, 0, v17, vcc
@@ -1305,7 +1287,7 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: v_and_b32_e32 v3, 2, v0
; GISEL-NEXT: v_mov_b32_e32 v0, v10
; GISEL-NEXT: v_mov_b32_e32 v1, v11
-; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v24, v3
+; GISEL-NEXT: v_sub_i32_e64 v15, s[4:5], v24, v3
; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v2, s[4:5]
; GISEL-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
; GISEL-NEXT: v_subbrev_u32_e64 v17, vcc, 0, v17, s[4:5]
@@ -1315,9 +1297,9 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: .LBB1_5: ; %Flow14
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1
; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v15
+; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v13
; GISEL-NEXT: v_or_b32_e32 v8, v8, v10
; GISEL-NEXT: v_or_b32_e32 v16, v0, v2
; GISEL-NEXT: v_or_b32_e32 v17, v1, v3
@@ -1325,84 +1307,76 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: s_mov_b64 s[8:9], 0
; GISEL-NEXT: v_ashrrev_i32_e32 v19, 31, v7
-; GISEL-NEXT: v_mov_b32_e32 v2, 0x5e
-; GISEL-NEXT: v_mov_b32_e32 v12, 0x7f
-; GISEL-NEXT: v_mov_b32_e32 v13, 0
+; GISEL-NEXT: v_mov_b32_e32 v10, 0x5e
+; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f
+; GISEL-NEXT: v_mov_b32_e32 v3, 0
; GISEL-NEXT: v_xor_b32_e32 v0, v19, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v19, v5
-; GISEL-NEXT: v_xor_b32_e32 v3, v19, v6
-; GISEL-NEXT: v_xor_b32_e32 v6, v19, v7
-; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v19
-; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v1, v19, vcc
-; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v3, v19, vcc
-; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v6, v19, vcc
-; GISEL-NEXT: v_ffbh_u32_e32 v3, v5
-; GISEL-NEXT: v_ffbh_u32_e32 v6, v4
-; GISEL-NEXT: v_or_b32_e32 v0, v4, v10
-; GISEL-NEXT: v_or_b32_e32 v1, v5, v11
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 32, v6
-; GISEL-NEXT: v_ffbh_u32_e32 v7, v11
-; GISEL-NEXT: v_ffbh_u32_e32 v14, v10
-; GISEL-NEXT: v_min_u32_e32 v3, v3, v6
-; GISEL-NEXT: v_add_i32_e32 v6, vcc, 32, v14
+; GISEL-NEXT: v_xor_b32_e32 v4, v19, v6
+; GISEL-NEXT: v_xor_b32_e32 v5, v19, v7
+; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v19
+; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v1, v19, vcc
+; GISEL-NEXT: v_subb_u32_e32 v12, vcc, v4, v19, vcc
+; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v5, v19, vcc
+; GISEL-NEXT: v_ffbh_u32_e32 v4, v7
+; GISEL-NEXT: v_ffbh_u32_e32 v5, v6
+; GISEL-NEXT: v_or_b32_e32 v0, v6, v12
+; GISEL-NEXT: v_or_b32_e32 v1, v7, v13
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v5
+; GISEL-NEXT: v_ffbh_u32_e32 v11, v13
+; GISEL-NEXT: v_ffbh_u32_e32 v14, v12
+; GISEL-NEXT: v_min_u32_e32 v4, v4, v5
+; GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v14
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
-; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v3
-; GISEL-NEXT: v_min_u32_e32 v1, v7, v6
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v4
+; GISEL-NEXT: v_min_u32_e32 v1, v11, v5
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v0
-; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
+; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v0
+; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
-; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[12:13]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v6, 0x7f, v2
+; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[2:3]
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v6, v6, v0
-; GISEL-NEXT: v_or_b32_e32 v7, v3, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
-; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v7, v14, v12
-; GISEL-NEXT: v_and_b32_e32 v12, 1, v7
-; GISEL-NEXT: v_or_b32_e32 v6, v7, v6
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v4, 0, vcc
-; GISEL-NEXT: v_and_b32_e32 v14, 1, v6
-; GISEL-NEXT: v_cndmask_b32_e64 v13, v5, 0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v6, v10, 0, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v7, v11, 0, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; GISEL-NEXT: v_or_b32_e32 v2, v14, v2
+; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v6, 0, vcc
; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
+; GISEL-NEXT: v_cndmask_b32_e64 v11, v7, 0, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v3, v13, 0, vcc
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB1_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
-; GISEL-NEXT: v_add_i32_e32 v20, vcc, 1, v2
-; GISEL-NEXT: v_addc_u32_e64 v21, s[4:5], 0, v3, vcc
-; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v2
+; GISEL-NEXT: v_add_i32_e32 v20, vcc, 1, v4
+; GISEL-NEXT: v_addc_u32_e64 v21, s[4:5], 0, v5, vcc
+; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v4
; GISEL-NEXT: v_not_b32_e32 v2, 63
; GISEL-NEXT: v_addc_u32_e64 v22, vcc, 0, v0, s[4:5]
; GISEL-NEXT: v_addc_u32_e32 v23, vcc, 0, v1, vcc
-; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v24, v2
-; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], 64, v24
-; GISEL-NEXT: v_lshl_b64 v[0:1], v[4:5], v24
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[10:11], v24
+; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v24, v2
+; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 64, v24
+; GISEL-NEXT: v_lshl_b64 v[0:1], v[6:7], v24
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v24
; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
-; GISEL-NEXT: v_lshr_b64 v[6:7], v[4:5], v6
-; GISEL-NEXT: v_lshl_b64 v[14:15], v[4:5], v12
+; GISEL-NEXT: v_lshr_b64 v[4:5], v[6:7], v4
+; GISEL-NEXT: v_lshl_b64 v[14:15], v[6:7], v10
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
-; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v1, vcc
-; GISEL-NEXT: v_or_b32_e32 v0, v6, v2
-; GISEL-NEXT: v_or_b32_e32 v1, v7, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v1, vcc
+; GISEL-NEXT: v_or_b32_e32 v0, v4, v2
+; GISEL-NEXT: v_or_b32_e32 v1, v5, v3
; GISEL-NEXT: v_cndmask_b32_e32 v0, v14, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
-; GISEL-NEXT: v_cndmask_b32_e32 v6, v0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v7, v1, v11, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v12, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v5, v1, v13, vcc
; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
; GISEL-NEXT: v_mov_b32_e32 v0, s8
; GISEL-NEXT: v_mov_b32_e32 v1, s9
@@ -1414,59 +1388,59 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v20
; GISEL-NEXT: v_sub_i32_e32 v14, vcc, 64, v20
-; GISEL-NEXT: v_lshr_b64 v[0:1], v[10:11], v20
-; GISEL-NEXT: v_lshr_b64 v[2:3], v[4:5], v20
+; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v20
+; GISEL-NEXT: v_lshr_b64 v[2:3], v[6:7], v20
; GISEL-NEXT: s_mov_b64 s[8:9], 0
-; GISEL-NEXT: v_lshl_b64 v[14:15], v[10:11], v14
-; GISEL-NEXT: v_lshr_b64 v[10:11], v[10:11], v24
+; GISEL-NEXT: v_lshl_b64 v[14:15], v[12:13], v14
+; GISEL-NEXT: v_lshr_b64 v[12:13], v[12:13], v24
; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
; GISEL-NEXT: v_or_b32_e32 v3, v3, v15
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v20
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v20
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v3, v5, vcc
-; GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v2, v6, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v13, v3, v7, vcc
+; GISEL-NEXT: v_mov_b32_e32 v7, 0
; GISEL-NEXT: v_mov_b32_e32 v0, s8
; GISEL-NEXT: v_mov_b32_e32 v1, s9
; GISEL-NEXT: v_mov_b32_e32 v2, s10
; GISEL-NEXT: v_mov_b32_e32 v3, s11
; GISEL-NEXT: .LBB1_9: ; %udiv-do-while
; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT: v_lshl_b64 v[24:25], v[10:11], 1
-; GISEL-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v5, 31, v11
-; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v7
; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1
-; GISEL-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v11, 31, v13
+; GISEL-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
+; GISEL-NEXT: v_lshrrev_b32_e32 v6, 31, v13
+; GISEL-NEXT: v_lshrrev_b32_e32 v24, 31, v5
+; GISEL-NEXT: v_lshl_b64 v[12:13], v[10:11], 1
+; GISEL-NEXT: v_lshl_b64 v[4:5], v[4:5], 1
+; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v11
; GISEL-NEXT: v_add_i32_e32 v20, vcc, -1, v20
; GISEL-NEXT: v_addc_u32_e32 v21, vcc, -1, v21, vcc
-; GISEL-NEXT: v_or_b32_e32 v5, v14, v5
-; GISEL-NEXT: v_or_b32_e32 v10, v24, v10
-; GISEL-NEXT: v_or_b32_e32 v6, v6, v11
-; GISEL-NEXT: v_or_b32_e32 v12, v0, v2
-; GISEL-NEXT: v_or_b32_e32 v13, v1, v3
+; GISEL-NEXT: v_or_b32_e32 v14, v14, v6
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v24
+; GISEL-NEXT: v_or_b32_e32 v4, v4, v10
+; GISEL-NEXT: v_or_b32_e32 v10, v0, v12
+; GISEL-NEXT: v_or_b32_e32 v11, v1, v13
; GISEL-NEXT: v_addc_u32_e32 v22, vcc, -1, v22, vcc
; GISEL-NEXT: v_addc_u32_e32 v23, vcc, -1, v23, vcc
-; GISEL-NEXT: v_sub_i32_e32 v0, vcc, 1, v25
-; GISEL-NEXT: v_subb_u32_e32 v0, vcc, 0, v5, vcc
-; GISEL-NEXT: v_subrev_i32_e64 v10, s[4:5], 0, v10
+; GISEL-NEXT: v_sub_i32_e32 v0, vcc, 1, v3
+; GISEL-NEXT: v_subb_u32_e32 v0, vcc, 0, v14, vcc
+; GISEL-NEXT: v_subrev_i32_e64 v12, s[4:5], 0, v2
; GISEL-NEXT: v_or_b32_e32 v0, v20, v22
; GISEL-NEXT: v_or_b32_e32 v1, v21, v23
; GISEL-NEXT: v_subb_u32_e32 v2, vcc, 0, v15, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v2
-; GISEL-NEXT: v_and_b32_e32 v3, 1, v0
+; GISEL-NEXT: v_and_b32_e32 v6, 1, v0
; GISEL-NEXT: v_and_b32_e32 v2, 2, v0
-; GISEL-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-NEXT: v_mov_b32_e32 v1, v4
-; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v25, v2
-; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v5, s[4:5]
+; GISEL-NEXT: v_mov_b32_e32 v0, v6
+; GISEL-NEXT: v_mov_b32_e32 v1, v7
+; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v3, v2
+; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v14, s[4:5]
; GISEL-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
; GISEL-NEXT: v_subbrev_u32_e64 v15, vcc, 0, v15, s[4:5]
; GISEL-NEXT: s_andn2_b64 exec, exec, s[8:9]
@@ -1475,30 +1449,30 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: .LBB1_11: ; %Flow11
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1
-; GISEL-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
-; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v13
-; GISEL-NEXT: v_or_b32_e32 v6, v6, v4
-; GISEL-NEXT: v_or_b32_e32 v12, v0, v2
-; GISEL-NEXT: v_or_b32_e32 v13, v1, v3
+; GISEL-NEXT: v_lshl_b64 v[6:7], v[10:11], 1
+; GISEL-NEXT: v_lshl_b64 v[2:3], v[4:5], 1
+; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v11
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v4
+; GISEL-NEXT: v_or_b32_e32 v10, v0, v6
+; GISEL-NEXT: v_or_b32_e32 v11, v1, v7
; GISEL-NEXT: .LBB1_12: ; %Flow12
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_xor_b32_e32 v0, v16, v18
; GISEL-NEXT: v_xor_b32_e32 v1, v17, v18
-; GISEL-NEXT: v_xor_b32_e32 v2, v8, v18
-; GISEL-NEXT: v_xor_b32_e32 v3, v9, v18
-; GISEL-NEXT: v_xor_b32_e32 v4, v12, v19
-; GISEL-NEXT: v_xor_b32_e32 v5, v13, v19
-; GISEL-NEXT: v_xor_b32_e32 v6, v6, v19
-; GISEL-NEXT: v_xor_b32_e32 v7, v7, v19
+; GISEL-NEXT: v_xor_b32_e32 v6, v8, v18
+; GISEL-NEXT: v_xor_b32_e32 v7, v9, v18
+; GISEL-NEXT: v_xor_b32_e32 v4, v10, v19
+; GISEL-NEXT: v_xor_b32_e32 v5, v11, v19
+; GISEL-NEXT: v_xor_b32_e32 v8, v2, v19
+; GISEL-NEXT: v_xor_b32_e32 v9, v3, v19
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v18
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v18, vcc
; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v19
; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v19, s[4:5]
-; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v18, vcc
-; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v18, vcc
-; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v19, s[4:5]
-; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v19, vcc
+; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v6, v18, vcc
+; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v7, v18, vcc
+; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v8, v19, s[4:5]
+; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v9, v19, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
%shl = sdiv <2 x i128> %lhs, <i128 8589934592, i128 8589934592>
ret <2 x i128> %shl
@@ -2248,28 +2222,23 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
; SDAG-NEXT: v_sub_i32_e32 v12, vcc, 0x5e, v2
; SDAG-NEXT: v_subb_u32_e32 v13, vcc, 0, v10, vcc
-; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v12
; SDAG-NEXT: v_subb_u32_e32 v14, vcc, 0, v15, vcc
; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[12:13]
-; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7]
; SDAG-NEXT: v_subb_u32_e32 v15, vcc, 0, v15, vcc
-; SDAG-NEXT: v_or_b32_e32 v2, v2, v14
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
-; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v3, v13, v15
+; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; SDAG-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
-; SDAG-NEXT: v_and_b32_e32 v2, 1, v10
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v2
-; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; SDAG-NEXT: v_and_b32_e32 v2, 1, v2
+; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; SDAG-NEXT: v_cndmask_b32_e64 v3, v9, 0, s[4:5]
-; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
+; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; SDAG-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v10, v1, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v11, v0, 0, s[4:5]
-; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; SDAG-NEXT: s_cbranch_execz .LBB3_6
; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
; SDAG-NEXT: v_add_i32_e32 v18, vcc, 1, v12
@@ -2395,28 +2364,23 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; SDAG-NEXT: v_sub_i32_e32 v0, vcc, 0x5e, v0
; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v8, vcc
-; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v0
; SDAG-NEXT: v_subb_u32_e32 v14, vcc, 0, v15, vcc
; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1]
-; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[6:7]
; SDAG-NEXT: v_subb_u32_e32 v15, vcc, 0, v15, vcc
-; SDAG-NEXT: v_or_b32_e32 v8, v8, v14
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
-; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; SDAG-NEXT: v_or_b32_e32 v9, v1, v15
+; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
-; SDAG-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
-; SDAG-NEXT: v_and_b32_e32 v8, 1, v12
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v8
-; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
+; SDAG-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
+; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
+; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
+; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; SDAG-NEXT: v_cndmask_b32_e64 v9, v7, 0, s[4:5]
-; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
+; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; SDAG-NEXT: v_cndmask_b32_e64 v8, v6, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v12, v5, 0, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v13, v4, 0, s[4:5]
-; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; SDAG-NEXT: s_cbranch_execz .LBB3_12
; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
; SDAG-NEXT: v_add_i32_e32 v18, vcc, 1, v0
@@ -2548,38 +2512,30 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: v_add_i32_e32 v13, vcc, 32, v13
; GISEL-NEXT: v_add_i32_e32 v15, vcc, 32, v15
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
-; GISEL-NEXT: v_min_u32_e32 v2, v12, v13
-; GISEL-NEXT: v_min_u32_e32 v3, v14, v15
-; GISEL-NEXT: v_add_i32_e32 v2, vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GISEL-NEXT: v_min_u32_e32 v3, v12, v13
+; GISEL-NEXT: v_min_u32_e32 v12, v14, v15
+; GISEL-NEXT: v_add_i32_e32 v3, vcc, 64, v3
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
-; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v16, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc
+; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v16, v3
; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, vcc
; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[10:11]
-; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13]
-; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v2, v2, v12
-; GISEL-NEXT: v_or_b32_e32 v3, v15, v13
+; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
-; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v3, v17, v10
-; GISEL-NEXT: v_and_b32_e32 v10, 1, v3
-; GISEL-NEXT: v_or_b32_e32 v2, v3, v2
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
+; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
+; GISEL-NEXT: v_or_b32_e32 v2, v2, v3
+; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; GISEL-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
-; GISEL-NEXT: v_and_b32_e32 v16, 1, v2
+; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
; GISEL-NEXT: v_cndmask_b32_e64 v11, v1, 0, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, 0, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB3_6
; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
@@ -2710,27 +2666,19 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) {
; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
-; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v14
+; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
-; GISEL-NEXT: v_or_b32_e32 v9, v15, v1
+; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
-; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v9, v17, v12
-; GISEL-NEXT: v_and_b32_e32 v12, 1, v9
-; GISEL-NEXT: v_or_b32_e32 v8, v9, v8
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
+; GISEL-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
+; GISEL-NEXT: v_or_b32_e32 v8, v17, v8
+; GISEL-NEXT: v_and_b32_e32 v8, 1, v8
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
; GISEL-NEXT: v_cndmask_b32_e64 v12, v4, 0, vcc
-; GISEL-NEXT: v_and_b32_e32 v16, 1, v8
+; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
; GISEL-NEXT: v_cndmask_b32_e64 v13, v5, 0, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v8, v6, 0, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v9, v7, 0, vcc
-; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
-; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GISEL-NEXT: s_cbranch_execz .LBB3_12
; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index fdb20f372ab8d..d3a027f99947b 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1275,12 +1275,11 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_addc_u32 s11, 0, -1
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[10:11], 63
-; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[10:11], 63
-; GCN-IR-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
-; GCN-IR-NEXT: s_and_b64 s[8:9], s[12:13], exec
+; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
+; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[8:9]
+; GCN-IR-NEXT: s_and_b64 s[8:9], s[8:9], exec
+; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24
-; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15]
-; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13]
; GCN-IR-NEXT: s_mov_b32 s9, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1462,13 +1461,11 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
+; GCN-IR-NEXT: v_mov_b32_e32 v11, v10
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
; GCN-IR-NEXT: v_cndmask_b32_e64 v4, 24, 0, s[4:5]
; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT: v_mov_b32_e32 v11, v10
-; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GCN-IR-NEXT: s_cbranch_execz .LBB11_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1653,14 +1650,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc
-; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5]
-; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GCN-IR-NEXT: v_mov_b32_e32 v11, v10
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
+; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5]
+; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GCN-IR-NEXT: s_cbranch_execz .LBB12_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1755,12 +1750,10 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[0:1]
; GCN-IR-NEXT: v_mov_b32_e32 v9, v8
; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[0:1]
-; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v5, 0, s[4:5]
+; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5]
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; GCN-IR-NEXT: s_cbranch_execz .LBB13_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 02d2e6c1473ab..3bee2fa7da49a 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -1414,73 +1414,72 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-LABEL: s_test_srem_k_num_i64:
; GCN-IR: ; %bb.0: ; %_udiv-special-cases
; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-IR-NEXT: s_mov_b64 s[6:7], 0
; GCN-IR-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT: s_ashr_i32 s8, s3, 31
-; GCN-IR-NEXT: s_mov_b32 s9, s8
-; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9]
-; GCN-IR-NEXT: s_sub_u32 s4, s2, s8
-; GCN-IR-NEXT: s_subb_u32 s5, s3, s8
+; GCN-IR-NEXT: s_ashr_i32 s6, s3, 31
+; GCN-IR-NEXT: s_mov_b32 s7, s6
+; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7]
+; GCN-IR-NEXT: s_sub_u32 s4, s2, s6
+; GCN-IR-NEXT: s_subb_u32 s5, s3, s6
; GCN-IR-NEXT: s_flbit_i32_b64 s14, s[4:5]
-; GCN-IR-NEXT: s_add_u32 s2, s14, 0xffffffc5
-; GCN-IR-NEXT: s_addc_u32 s3, 0, -1
-; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[4:5], 0
-; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[2:3], 63
-; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 63
-; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11]
-; GCN-IR-NEXT: s_and_b64 s[8:9], s[10:11], exec
-; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24
-; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
-; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11]
-; GCN-IR-NEXT: s_mov_b32 s9, 0
+; GCN-IR-NEXT: s_add_u32 s8, s14, 0xffffffc5
+; GCN-IR-NEXT: s_addc_u32 s9, 0, -1
+; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[4:5], 0
+; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63
+; GCN-IR-NEXT: s_mov_b64 s[2:3], 0
+; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
+; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
+; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24
+; GCN-IR-NEXT: s_mov_b32 s7, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
-; GCN-IR-NEXT: s_add_u32 s8, s2, 1
-; GCN-IR-NEXT: s_addc_u32 s3, s3, 0
-; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0
-; GCN-IR-NEXT: s_sub_i32 s2, 63, s2
-; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11]
-; GCN-IR-NEXT: s_lshl_b64 s[2:3], 24, s2
+; GCN-IR-NEXT: s_add_u32 s10, s8, 1
+; GCN-IR-NEXT: s_addc_u32 s6, s9, 0
+; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0
+; GCN-IR-NEXT: s_sub_i32 s8, 63, s8
+; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7]
+; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s8
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4
; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader
-; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s8
+; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10
; GCN-IR-NEXT: s_add_u32 s12, s4, -1
; GCN-IR-NEXT: s_addc_u32 s13, s5, -1
; GCN-IR-NEXT: s_sub_u32 s14, 58, s14
; GCN-IR-NEXT: s_subb_u32 s15, 0, 0
; GCN-IR-NEXT: s_mov_b64 s[8:9], 0
-; GCN-IR-NEXT: s_mov_b32 s7, 0
+; GCN-IR-NEXT: s_mov_b32 s3, 0
; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while
; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1
-; GCN-IR-NEXT: s_lshr_b32 s6, s3, 31
-; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
-; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[6:7]
-; GCN-IR-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
-; GCN-IR-NEXT: s_sub_u32 s6, s12, s10
-; GCN-IR-NEXT: s_subb_u32 s6, s13, s11
-; GCN-IR-NEXT: s_ashr_i32 s8, s6, 31
+; GCN-IR-NEXT: s_lshr_b32 s2, s7, 31
+; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
+; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[2:3]
+; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
+; GCN-IR-NEXT: s_sub_u32 s2, s12, s10
+; GCN-IR-NEXT: s_subb_u32 s2, s13, s11
+; GCN-IR-NEXT: s_ashr_i32 s8, s2, 31
; GCN-IR-NEXT: s_mov_b32 s9, s8
-; GCN-IR-NEXT: s_and_b32 s6, s8, 1
+; GCN-IR-NEXT: s_and_b32 s2, s8, 1
; GCN-IR-NEXT: s_and_b64 s[16:17], s[8:9], s[4:5]
; GCN-IR-NEXT: s_sub_u32 s10, s10, s16
; GCN-IR-NEXT: s_subb_u32 s11, s11, s17
; GCN-IR-NEXT: s_add_u32 s14, s14, 1
; GCN-IR-NEXT: s_addc_u32 s15, s15, 0
; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0
-; GCN-IR-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GCN-IR-NEXT: s_mov_b64 s[8:9], s[2:3]
; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17]
; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3
; GCN-IR-NEXT: .LBB10_4: ; %Flow6
-; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
-; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[2:3]
+; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
+; GCN-IR-NEXT: s_or_b64 s[6:7], s[2:3], s[6:7]
; GCN-IR-NEXT: .LBB10_5: ; %udiv-end
-; GCN-IR-NEXT: v_mov_b32_e32 v0, s8
+; GCN-IR-NEXT: v_mov_b32_e32 v0, s6
; GCN-IR-NEXT: v_mul_hi_u32 v0, s4, v0
-; GCN-IR-NEXT: s_mul_i32 s6, s4, s9
-; GCN-IR-NEXT: s_mul_i32 s5, s5, s8
-; GCN-IR-NEXT: s_mul_i32 s4, s4, s8
-; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s6, v0
+; GCN-IR-NEXT: s_mul_i32 s7, s4, s7
+; GCN-IR-NEXT: s_mul_i32 s5, s5, s6
+; GCN-IR-NEXT: s_mul_i32 s4, s4, s6
+; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s7, v0
; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s5, v0
; GCN-IR-NEXT: v_sub_i32_e64 v0, vcc, 24, s4
; GCN-IR-NEXT: s_mov_b32 s3, 0xf000
@@ -1612,12 +1611,10 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
+; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GCN-IR-NEXT: v_cndmask_b32_e64 v4, 24, 0, s[4:5]
; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GCN-IR-NEXT: s_cbranch_execz .LBB11_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1801,13 +1798,11 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5]
; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GCN-IR-NEXT: s_cbranch_execz .LBB12_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1908,12 +1903,10 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3]
; GCN-IR-NEXT: v_mov_b32_e32 v11, v10
; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v1, 0, s[4:5]
+; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v0, 0, s[4:5]
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; GCN-IR-NEXT: s_cbranch_execz .LBB13_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index 1c50f930facba..3f5be80b1efbd 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -912,12 +912,11 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_addc_u32 s9, 0, -1
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63
-; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
-; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
-; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec
+; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
+; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24
-; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
-; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11]
; GCN-IR-NEXT: s_mov_b32 s7, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1083,13 +1082,11 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5]
-; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[4:5]
; GCN-IR-NEXT: v_mov_b32_e32 v3, 0x8000
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT: v_mov_b32_e32 v2, 0
; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[4:5]
; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT: v_mov_b32_e32 v2, 0
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GCN-IR-NEXT: s_cbranch_execz .LBB9_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1173,12 +1170,10 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5]
; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5]
-; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5]
+; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5]
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; GCN-IR-NEXT: s_cbranch_execz .LBB10_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4
@@ -1277,13 +1272,12 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_subb_u32 s9, 0, 0
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63
-; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec
-; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3
+; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec
+; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2
-; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13]
-; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5]
+; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3
; GCN-IR-NEXT: s_mov_b64 s[4:5], 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1372,12 +1366,10 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5]
; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5]
-; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5]
+; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1
; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5]
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; GCN-IR-NEXT: s_cbranch_execz .LBB12_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 28e6627b87413..b6608b9f48a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -926,12 +926,11 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_addc_u32 s9, 0, -1
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63
-; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
-; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11]
-; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec
+; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
+; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7]
+; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec
+; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24
-; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
-; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11]
; GCN-IR-NEXT: s_mov_b32 s7, 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1042,13 +1041,12 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-IR-NEXT: s_subb_u32 s9, 0, 0
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0
; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63
-; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec
-; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3
+; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec
+; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2
-; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13]
-; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5]
+; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3
; GCN-IR-NEXT: s_mov_b64 s[4:5], 0
; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1219,13 +1217,11 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3]
; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000
; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc
+; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5]
; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; GCN-IR-NEXT: s_cbranch_execz .LBB8_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
@@ -1310,22 +1306,20 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: v_add_i32_e64 v2, s[4:5], 32, v2
; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1
; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3
-; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 48, v8
-; GCN-IR-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5]
+; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 48, v8
+; GCN-IR-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, s[4:5]
; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
-; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3]
+; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5]
; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3]
-; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1
-; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v1, 0, s[4:5]
-; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v0, 0, s[4:5]
-; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc
-; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v1, 0, s[4:5]
+; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1
+; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5]
+; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; GCN-IR-NEXT: s_cbranch_execz .LBB9_6
; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1
-; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2
-; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2
+; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v4
+; GCN-IR-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc
+; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4
; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[0:1], v2
; GCN-IR-NEXT: v_mov_b32_e32 v4, 0
; GCN-IR-NEXT: v_mov_b32_e32 v5, 0
@@ -1369,11 +1363,11 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) {
; GCN-IR-NEXT: .LBB9_5: ; %Flow4
; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
-; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3
-; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2
+; GCN-IR-NEXT: v_or_b32_e32 v3, v5, v3
+; GCN-IR-NEXT: v_or_b32_e32 v2, v4, v2
; GCN-IR-NEXT: .LBB9_6: ; %Flow5
; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7]
-; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[4:5], 15
+; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 15
; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
; GCN-IR-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/PowerPC/add_cmp.ll b/llvm/test/CodeGen/PowerPC/add_cmp.ll
index cbe16a498a538..c5cc071e0183d 100644
--- a/llvm/test/CodeGen/PowerPC/add_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/add_cmp.ll
@@ -30,27 +30,27 @@ entry:
define zeroext i1 @addiCmpiUnsignedOverflow(i32 zeroext %x) {
entry:
- %add = add nuw i32 110, %x
- %cmp = icmp ugt i32 %add, 100
+ %add = add nuw i32 110, %x
+ %cmp = icmp ugt i32 %add, 200
ret i1 %cmp
; CHECK: === addiCmpiUnsignedOverflow
; CHECK: Optimized lowered selection DAG: %bb.0 'addiCmpiUnsignedOverflow:entry'
; CHECK: [[REG1:t[0-9]+]]: i32 = truncate {{t[0-9]+}}
; CHECK: [[REG2:t[0-9]+]]: i32 = add nuw [[REG1]], Constant:i32<110>
-; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<100>, setugt:ch
+; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<200>, setugt:ch
}
define zeroext i1 @addiCmpiSignedOverflow(i16 signext %x) {
entry:
- %add = add nsw i16 16, %x
- %cmp = icmp sgt i16 %add, -32767
+ %add = add nsw i16 16, %x
+ %cmp = icmp sgt i16 %add, 30
ret i1 %cmp
; CHECK: === addiCmpiSignedOverflow
; CHECK: Optimized lowered selection DAG: %bb.0 'addiCmpiSignedOverflow:entry'
; CHECK: [[REG1:t[0-9]+]]: i16 = truncate {{t[0-9]+}}
; CHECK: [[REG2:t[0-9]+]]: i16 = add nsw [[REG1]], Constant:i16<16>
-; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<-32767>, setgt:ch
+; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<30>, setgt:ch
}
diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll
index 38f8a829cf419..2e6fa20b86954 100644
--- a/llvm/test/Transforms/Attributor/range.ll
+++ b/llvm/test/Transforms/Attributor/range.ll
@@ -888,29 +888,13 @@ define dso_local i64 @select_int2ptr_bitcast_ptr2int(i32 %a) local_unnamed_addr
; TUNIT-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5
-; TUNIT-NEXT: [[DOT:%.*]] = select i1 [[CMP]], i32 1, i32 2
-; TUNIT-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 10
-; TUNIT-NEXT: [[Y_0_V:%.*]] = select i1 [[CMP1]], i32 1, i32 2
-; TUNIT-NEXT: [[Y_0:%.*]] = add nuw nsw i32 [[DOT]], [[Y_0_V]]
-; TUNIT-NEXT: [[CMP6:%.*]] = icmp eq i32 [[Y_0]], 5
-; TUNIT-NEXT: [[I2P:%.*]] = inttoptr i1 [[CMP6]] to ptr
-; TUNIT-NEXT: [[P2I:%.*]] = ptrtoint ptr [[I2P]] to i64
-; TUNIT-NEXT: ret i64 [[P2I]]
+; TUNIT-NEXT: ret i64 0
;
; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CGSCC-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
; CGSCC-NEXT: entry:
-; CGSCC-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5
-; CGSCC-NEXT: [[DOT:%.*]] = select i1 [[CMP]], i32 1, i32 2
-; CGSCC-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 10
-; CGSCC-NEXT: [[Y_0_V:%.*]] = select i1 [[CMP1]], i32 1, i32 2
-; CGSCC-NEXT: [[Y_0:%.*]] = add nuw nsw i32 [[DOT]], [[Y_0_V]]
-; CGSCC-NEXT: [[CMP6:%.*]] = icmp eq i32 [[Y_0]], 5
-; CGSCC-NEXT: [[I2P:%.*]] = inttoptr i1 [[CMP6]] to ptr
-; CGSCC-NEXT: [[P2I:%.*]] = ptrtoint ptr [[I2P]] to i64
-; CGSCC-NEXT: ret i64 [[P2I]]
+; CGSCC-NEXT: ret i64 0
;
entry:
%cmp = icmp sgt i32 %a, 5
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index aa68dfb540064..9d19ff1d37c26 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -3274,9 +3274,7 @@ define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) {
define <2 x i32> @dec_zext_add_nonzero_vec_poison2(<2 x i8> %x) {
; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison2(
; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8)
-; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i8> [[O]], splat (i8 -1)
-; CHECK-NEXT: [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32>
-; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], <i32 1, i32 poison>
+; CHECK-NEXT: [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[C]]
;
%o = or <2 x i8> %x, <i8 8, i8 8>
diff --git a/llvm/test/Transforms/InstCombine/fls.ll b/llvm/test/Transforms/InstCombine/fls.ll
index 68bc0a2fc8a1d..ea757268259f5 100644
--- a/llvm/test/Transforms/InstCombine/fls.ll
+++ b/llvm/test/Transforms/InstCombine/fls.ll
@@ -33,7 +33,7 @@ define i32 @flsnotconst(i64 %z) {
; CHECK-LABEL: @flsnotconst(
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[Z:%.*]], i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[CTLZ]] to i32
-; CHECK-NEXT: [[GOO:%.*]] = sub nsw i32 64, [[TMP1]]
+; CHECK-NEXT: [[GOO:%.*]] = sub nuw nsw i32 64, [[TMP1]]
; CHECK-NEXT: ret i32 [[GOO]]
;
%goo = call i32 @flsl(i64 %z)
diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll
index 85d01b1786cc9..486da0bd2b4d5 100644
--- a/llvm/test/Transforms/InstCombine/icmp-add.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -3160,7 +3160,8 @@ define i1 @icmp_add_constant_with_constant_ult_to_slt_neg2(i8 range(i8 -4, 120)
}
; Negative test: C2 is negative
-define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -4, 10) %x) {
+; Prevent constant fold by using the range [-10, 10).
+define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -10, 10) %x) {
; CHECK-LABEL: @icmp_add_constant_with_constant_ult_to_slt_neg3(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 4
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], -6
diff --git a/llvm/test/Transforms/InstCombine/pr80597.ll b/llvm/test/Transforms/InstCombine/pr80597.ll
index 148da056486f9..bf536b9ecd133 100644
--- a/llvm/test/Transforms/InstCombine/pr80597.ll
+++ b/llvm/test/Transforms/InstCombine/pr80597.ll
@@ -5,14 +5,9 @@ define i64 @pr80597(i1 %cond) {
; CHECK-LABEL: define i64 @pr80597(
; CHECK-SAME: i1 [[COND:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ADD:%.*]] = select i1 [[COND]], i64 0, i64 -12884901888
-; CHECK-NEXT: [[SEXT1:%.*]] = add nsw i64 [[ADD]], 8836839514384105472
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[SEXT1]], -34359738368
-; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.else:
-; CHECK-NEXT: [[SEXT2:%.*]] = ashr exact i64 [[ADD]], 1
-; CHECK-NEXT: [[ASHR:%.*]] = or disjoint i64 [[SEXT2]], 4418419761487020032
-; CHECK-NEXT: ret i64 [[ASHR]]
+; CHECK-NEXT: ret i64 poison
; CHECK: if.then:
; CHECK-NEXT: ret i64 0
;
diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll
index 6afb77d975b8c..3143d4addecc1 100644
--- a/llvm/test/Transforms/InstCombine/sadd_sat.ll
+++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll
@@ -824,11 +824,11 @@ entry:
define i16 @or(i8 %X, i16 %Y) {
; CHECK-LABEL: @or(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[Y:%.*]] to i8
-; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], -16
-; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[TMP2]])
-; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3]] to i16
-; CHECK-NEXT: ret i16 [[L12]]
+; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3:%.*]] to i16
+; CHECK-NEXT: [[CONV14:%.*]] = or i16 [[Y:%.*]], -16
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[L12]], [[CONV14]]
+; CHECK-NEXT: [[L13:%.*]] = call i16 @llvm.smin.i16(i16 [[SUB]], i16 127)
+; CHECK-NEXT: ret i16 [[L13]]
;
%conv10 = sext i8 %X to i16
%conv14 = or i16 %Y, 65520
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index efa89db4af61a..dff1f09213864 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1111,8 +1111,7 @@ define <3 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1_poison(<3 x i8> %a) {
; Can be optimized if the add nuw RHS constant range handles non-splat vectors.
define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) {
; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2(
-; CHECK-NEXT: [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 9>
-; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> splat (i8 9))
+; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], <i8 1, i8 0>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%b = add nuw <2 x i8> %a, <i8 10, i8 9>
@@ -1188,7 +1187,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8>
; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2(
; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 8>
; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7)
-; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]])
+; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]]
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%aa = add nsw <2 x i8> %a, <i8 7, i8 8>
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 2ee45dccc6595..b872bbb28bf7e 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -3486,6 +3486,21 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) {
EXPECT_EQ(CR.getSignedMin().getSExtValue(), -3);
EXPECT_EQ(CR.getSignedMax().getSExtValue(), 0);
}
+ {
+ auto M = parseModule(R"(
+ define i32 @test(i8 %x, i8 %y) {
+ %ext.x = zext i8 %x to i32
+ %ext.y = zext i8 %y to i32
+ %or = or disjoint i32 %ext.x, %ext.y
+ ret i32 %or
+ })");
+ Function *F = M->getFunction("test");
+ AssumptionCache AC(*F);
+ Instruction *Or = &findInstructionByName(F, "or");
+ ConstantRange CR = computeConstantRange(Or, false, true, &AC, Or);
+ EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u);
+ EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 510u);
+ }
}
struct FindAllocaForValueTestParams {
More information about the cfe-commits
mailing list