[llvm] AMDGPU: Strip sign bit operations on llvm.amdgcn.trig.preop uses (PR #179712)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 4 09:22:43 PST 2026
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/179712
The instruction ignores the sign bit, so we can find the magnitude source.
The real library use has a fabs input which this avoids.
stripSignOnlyFPOps should probably go directly into PatternMatch in some
form.
>From f03da7f96de45c2b2662c33c9f2eecf2c47e12a1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 31 Jan 2026 16:24:32 +0100
Subject: [PATCH] AMDGPU: Strip sign bit operations on llvm.amdgcn.trig.preop
uses
The instruction ignores the sign bit, so we can find the magnitude source.
The real library use has a fabs input which this avoids.
stripSignOnlyFPOps should probably go directly into PatternMatch in some
form.
---
.../Transforms/InstCombine/InstCombiner.h | 11 ++++
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 5 ++
.../InstCombine/InstCombineAndOrXor.cpp | 9 ---
.../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 64 +++++++++++++++++++
4 files changed, 80 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index d6c2d7fc48bda..1563c9319df87 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -331,6 +331,17 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
return ConstantVector::get(Out);
}
+ /// Ignore all operations which only change the sign of a value, returning the
+ /// underlying magnitude value.
+ static Value *stripSignOnlyFPOps(Value *Val) {
+ using namespace llvm::PatternMatch;
+
+ match(Val, m_FNeg(m_Value(Val)));
+ match(Val, m_FAbs(m_Value(Val)));
+ match(Val, m_CopySign(m_Value(Val), m_Value()));
+ return Val;
+ }
+
void addToWorklist(Instruction *I) { Worklist.push(I); }
AssumptionCache &getAssumptionCache() const { return AC; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 467236e57863a..79430a1a866d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1462,6 +1462,11 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (isa<UndefValue>(Segment))
return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType()));
+ // Sign bit is not used.
+ Value *StrippedSign = InstCombiner::stripSignOnlyFPOps(Src);
+ if (StrippedSign != Src)
+ return IC.replaceOperand(II, 0, StrippedSign);
+
if (II.isStrictFP())
break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b23519fd9f77f..fde94c7616e79 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1409,15 +1409,6 @@ Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1,
return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));
}
-/// Ignore all operations which only change the sign of a value, returning the
-/// underlying magnitude value.
-static Value *stripSignOnlyFPOps(Value *Val) {
- match(Val, m_FNeg(m_Value(Val)));
- match(Val, m_FAbs(m_Value(Val)));
- match(Val, m_CopySign(m_Value(Val), m_Value()));
- return Val;
-}
-
/// Matches canonical form of isnan, fcmp ord x, 0
static bool matchIsNotNaN(FCmpInst::Predicate P, Value *LHS, Value *RHS) {
return P == FCmpInst::FCMP_ORD && match(RHS, m_AnyZeroFP());
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 45e7896aaa7b7..c1621069abf71 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -5980,6 +5980,70 @@ define double @trig_preop_constfold_exponent1968_mantissaX__outbound_segment() {
ret double %val
}
+define double @trig_preop_strip_fabs(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fabs(
+; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT: ret double [[RESULT]]
+;
+ %fabs = call double @llvm.fabs.f64(double %val)
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx)
+ ret double %result
+}
+
+define double @trig_preop_strip_fabs_multi_use(double %val, i32 %idx, ptr %ptr) {
+; CHECK-LABEL: @trig_preop_strip_fabs_multi_use(
+; CHECK-NEXT: [[FABS:%.*]] = call double @llvm.fabs.f64(double [[VAL:%.*]])
+; CHECK-NEXT: store double [[FABS]], ptr [[PTR:%.*]], align 8
+; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL]], i32 [[IDX:%.*]])
+; CHECK-NEXT: ret double [[RESULT]]
+;
+ %fabs = call double @llvm.fabs.f64(double %val)
+ store double %fabs, ptr %ptr
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx)
+ ret double %result
+}
+
+define double @trig_preop_strip_fneg(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fneg(
+; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT: ret double [[RESULT]]
+;
+ %fneg = fneg double %val
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %fneg, i32 %idx)
+ ret double %result
+}
+
+define double @trig_preop_strip_fneg_fabs(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fneg_fabs(
+; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT: ret double [[RESULT]]
+;
+ %fabs = call double @llvm.fabs.f64(double %val)
+ %fneg.fabs = fneg double %fabs
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %fneg.fabs, i32 %idx)
+ ret double %result
+}
+
+define double @trig_preop_strip_copysign(double %mag, double %sign, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_copysign(
+; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[MAG:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT: ret double [[RESULT]]
+;
+ %copysign = call double @llvm.copysign.f64(double %mag, double %sign)
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %copysign, i32 %idx)
+ ret double %result
+}
+
+define double @trig_preop_strip_fabs_strictfp(double %val, i32 %idx) strictfp {
+; CHECK-LABEL: @trig_preop_strip_fabs_strictfp(
+; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]]) #[[ATTR20]]
+; CHECK-NEXT: ret double [[RESULT]]
+;
+ %fabs = call double @llvm.fabs.f64(double %val)
+ %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx) strictfp
+ ret double %result
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.log
; --------------------------------------------------------------------
More information about the llvm-commits
mailing list