[llvm] 79bf8c0 - [InstCombine] Fold select(X >s 0, 0, -X) | smax(X, 0) to abs(X) (#165200)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 2 15:38:26 PST 2025
Author: Wenju He
Date: 2025-11-03T07:38:21+08:00
New Revision: 79bf8c0331bc7e3655e208da7ad8aa780105ac82
URL: https://github.com/llvm/llvm-project/commit/79bf8c0331bc7e3655e208da7ad8aa780105ac82
DIFF: https://github.com/llvm/llvm-project/commit/79bf8c0331bc7e3655e208da7ad8aa780105ac82.diff
LOG: [InstCombine] Fold select(X >s 0, 0, -X) | smax(X, 0) to abs(X) (#165200)
The IR pattern is compiled from OpenCL code:
__builtin_astype(x > (uchar2)(0) ? x : -x, uchar2);
where smax is created by foldSelectInstWithICmp + canonicalizeSPF.
smax could also come from direct elementwise max call:
int c = b > (int)(0) ? (int)(0) : -b;
int d = __builtin_elementwise_max(b, (int)(0));
*a = c | d;
https://alive2.llvm.org/ce/z/2-brvr
https://alive2.llvm.org/ce/z/Dowjzk
https://alive2.llvm.org/ce/z/kathwZ
---------
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
Added:
Modified:
clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
llvm/test/Transforms/InstCombine/or.ll
Removed:
################################################################################
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
index d5d15b4dea966..35fde8733f375 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -3584,13 +3584,13 @@ void test_integer(void) {
// CHECK-ASM: vsrlb
vsc = vec_abs(vsc);
- // CHECK-ASM: vlcb
+ // CHECK-ASM: vlpb
vss = vec_abs(vss);
- // CHECK-ASM: vlch
+ // CHECK-ASM: vlph
vsi = vec_abs(vsi);
- // CHECK-ASM: vlcf
+ // CHECK-ASM: vlpf
vsl = vec_abs(vsl);
- // CHECK-ASM: vlcg
+ // CHECK-ASM: vlpg
vsc = vec_max(vsc, vsc);
// CHECK-ASM: vmxb
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
index 6ee9e1ee3a117..cd0fafdb7435f 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
@@ -246,7 +246,7 @@ void test_integer(void) {
// CHECK-ASM: vctzq
vslll = vec_abs(vslll);
- // CHECK-ASM: vlcq
+ // CHECK-ASM: vlpq
vslll = vec_avg(vslll, vslll);
// CHECK: call i128 @llvm.s390.vavgq(i128 %{{.*}}, i128 %{{.*}})
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3ddf182149e57..cbaff294819a2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
return nullptr;
}
+/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X)
+/// select(X <s 0, -X, 0) | smax(X, 0) --> abs(X)
+static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X;
+ Value *Sel;
+ if (match(&I,
+ m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) {
+ auto NegX = m_Neg(m_Specific(X));
+ if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X),
+ m_ZeroInt()),
+ m_ZeroInt(), NegX)) ||
+ match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X),
+ m_ZeroInt()),
+ NegX, m_ZeroInt())))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X,
+ Builder.getFalse());
+ }
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyAddWithRemainder(I))
return replaceInstUsesWith(I, V);
+ if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder))
+ return replaceInstUsesWith(I, Res);
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 6b090e982af0a..f61a1970d3aa4 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -2113,3 +2113,98 @@ define <4 x i32> @or_zext_nneg_minus_constant_splat(<4 x i8> %a) {
%or = or <4 x i32> %zext, splat (i32 -9)
ret <4 x i32> %or
}
+
+define i8 @or_positive_minus_non_positive_to_abs(i8 %a){
+; CHECK-LABEL: @or_positive_minus_non_positive_to_abs(
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.abs.i8(i8 [[A:%.*]], i1 false)
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %b = icmp sgt i8 %a, 0
+ %mask = sext i1 %b to i8
+ %neg = sub i8 0, %a
+ %mask_inv = xor i8 %mask, -1
+ %c = and i8 %neg, %mask_inv
+ %d = and i8 %a, %mask
+ %or = or i8 %c, %d
+ ret i8 %or
+}
+
+; TODO: Fold to smax https://alive2.llvm.org/ce/z/wDiDh2
+define i8 @or_select_smax_neg_to_abs(i8 %a){
+; CHECK-LABEL: @or_select_smax_neg_to_abs(
+; CHECK-NEXT: [[SGT0:%.*]] = icmp sgt i8 [[A:%.*]], 0
+; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[A]]
+; CHECK-NEXT: [[OR:%.*]] = select i1 [[SGT0]], i8 0, i8 [[NEG]]
+; CHECK-NEXT: ret i8 [[OR]]
+;
+ %sgt0 = icmp sgt i8 %a, 0
+ %neg = sub nsw i8 0, %a
+ %sel = select i1 %sgt0, i8 0, i8 %neg
+ ret i8 %sel
+}
+
+; TODO: Fold to abs https://alive2.llvm.org/ce/z/DybfHG
+define i8 @or_select_smax_smax_to_abs(i8 %a){
+; CHECK-LABEL: @or_select_smax_smax_to_abs(
+; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[A:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = call i8 @llvm.smax.i8(i8 [[NEG]], i8 0)
+; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 0)
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[SEL]], [[MAX]]
+; CHECK-NEXT: ret i8 [[OR]]
+;
+ %neg = sub nsw i8 0, %a
+ %sel = call i8 @llvm.smax.i8(i8 %neg, i8 0)
+ %max = call i8 @llvm.smax.i8(i8 %a, i8 0)
+ %or = or i8 %sel, %max
+ ret i8 %or
+}
+
+declare i8 @llvm.abs.i8(i8, i1)
+declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
+
+define <2 x i8> @or_sgt_select_smax_to_abs(<2 x i8> %a){
+; CHECK-LABEL: @or_sgt_select_smax_to_abs(
+; CHECK-NEXT: [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false)
+; CHECK-NEXT: ret <2 x i8> [[OR]]
+;
+ %sgt0 = icmp sgt <2 x i8> %a, zeroinitializer
+ %neg = sub <2 x i8> zeroinitializer, %a
+ %sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg
+ %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer)
+ %or = or <2 x i8> %sel, %max
+ ret <2 x i8> %or
+}
+
+define <2 x i8> @or_slt_select_smax_to_abs(<2 x i8> %a){
+; CHECK-LABEL: @or_slt_select_smax_to_abs(
+; CHECK-NEXT: [[OR:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[A:%.*]], i1 false)
+; CHECK-NEXT: ret <2 x i8> [[OR]]
+;
+ %slt0 = icmp slt <2 x i8> %a, zeroinitializer
+ %neg = sub <2 x i8> zeroinitializer, %a
+ %sel = select <2 x i1> %slt0, <2 x i8> %neg, <2 x i8> zeroinitializer
+ %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer)
+ %or = or <2 x i8> %sel, %max
+ ret <2 x i8> %or
+}
+
+; negative test - %d has multiple uses. %or is not folded to abs.
+
+define <2 x i8> @or_select_smax_multi_uses(<2 x i8> %a){
+; CHECK-LABEL: @or_select_smax_multi_uses(
+; CHECK-NEXT: [[B:%.*]] = icmp sgt <2 x i8> [[A:%.*]], zeroinitializer
+; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[A]]
+; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[B]], <2 x i8> zeroinitializer, <2 x i8> [[NEG]]
+; CHECK-NEXT: [[D:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[A]], <2 x i8> zeroinitializer)
+; CHECK-NEXT: [[OR1:%.*]] = or <2 x i8> [[C]], [[D]]
+; CHECK-NEXT: [[OR:%.*]] = add <2 x i8> [[OR1]], [[D]]
+; CHECK-NEXT: ret <2 x i8> [[OR]]
+;
+ %sgt0 = icmp sgt <2 x i8> %a, zeroinitializer
+ %neg = sub <2 x i8> zeroinitializer, %a
+ %sel = select <2 x i1> %sgt0, <2 x i8> zeroinitializer, <2 x i8> %neg
+ %max = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %a, <2 x i8> zeroinitializer)
+ %or = or <2 x i8> %sel, %max
+ %add = add <2 x i8> %or, %max
+ ret <2 x i8> %add
+}
More information about the llvm-commits
mailing list