[llvm] AMDGPU: Strip sign bit operations on llvm.amdgcn.trig.preop uses (PR #179712)

Wed Feb 4 09:22:43 PST 2026

https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/179712

The instruction ignores the sign bit, so we can find the magnitude source.
The real library use has a fabs input which this avoids.

stripSignOnlyFPOps should probably go directly into PatternMatch in some
form.

>From f03da7f96de45c2b2662c33c9f2eecf2c47e12a1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 31 Jan 2026 16:24:32 +0100
Subject: [PATCH] AMDGPU: Strip sign bit operations on llvm.amdgcn.trig.preop
 uses

The instruction ignores the sign bit, so we can find the magnitude source.
The real library use has a fabs input which this avoids.

stripSignOnlyFPOps should probably go directly into PatternMatch in some
form.
---
 .../Transforms/InstCombine/InstCombiner.h     | 11 ++++
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     |  5 ++
 .../InstCombine/InstCombineAndOrXor.cpp       |  9 ---
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 64 +++++++++++++++++++
 4 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index d6c2d7fc48bda..1563c9319df87 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -331,6 +331,17 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
     return ConstantVector::get(Out);
   }
 
+  /// Ignore all operations which only change the sign of a value, returning the
+  /// underlying magnitude value.
+  static Value *stripSignOnlyFPOps(Value *Val) {
+    using namespace llvm::PatternMatch;
+
+    match(Val, m_FNeg(m_Value(Val)));
+    match(Val, m_FAbs(m_Value(Val)));
+    match(Val, m_CopySign(m_Value(Val), m_Value()));
+    return Val;
+  }
+
   void addToWorklist(Instruction *I) { Worklist.push(I); }
 
   AssumptionCache &getAssumptionCache() const { return AC; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 467236e57863a..79430a1a866d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1462,6 +1462,11 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     if (isa<UndefValue>(Segment))
       return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType()));
 
+    // Sign bit is not used.
+    Value *StrippedSign = InstCombiner::stripSignOnlyFPOps(Src);
+    if (StrippedSign != Src)
+      return IC.replaceOperand(II, 0, StrippedSign);
+
     if (II.isStrictFP())
       break;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b23519fd9f77f..fde94c7616e79 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1409,15 +1409,6 @@ Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1,
   return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));
 }
 
-/// Ignore all operations which only change the sign of a value, returning the
-/// underlying magnitude value.
-static Value *stripSignOnlyFPOps(Value *Val) {
-  match(Val, m_FNeg(m_Value(Val)));
-  match(Val, m_FAbs(m_Value(Val)));
-  match(Val, m_CopySign(m_Value(Val), m_Value()));
-  return Val;
-}
-
 /// Matches canonical form of isnan, fcmp ord x, 0
 static bool matchIsNotNaN(FCmpInst::Predicate P, Value *LHS, Value *RHS) {
   return P == FCmpInst::FCMP_ORD && match(RHS, m_AnyZeroFP());
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 45e7896aaa7b7..c1621069abf71 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -5980,6 +5980,70 @@ define double @trig_preop_constfold_exponent1968_mantissaX__outbound_segment() {
   ret double %val
 }
 
+define double @trig_preop_strip_fabs(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fabs(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fabs_multi_use(double %val, i32 %idx, ptr %ptr) {
+; CHECK-LABEL: @trig_preop_strip_fabs_multi_use(
+; CHECK-NEXT:    [[FABS:%.*]] = call double @llvm.fabs.f64(double [[VAL:%.*]])
+; CHECK-NEXT:    store double [[FABS]], ptr [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  store double %fabs, ptr %ptr
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fneg(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fneg(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fneg = fneg double %val
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fneg, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fneg_fabs(double %val, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_fneg_fabs(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  %fneg.fabs = fneg double %fabs
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fneg.fabs, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_copysign(double %mag, double %sign, i32 %idx) {
+; CHECK-LABEL: @trig_preop_strip_copysign(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[MAG:%.*]], i32 [[IDX:%.*]])
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %copysign = call double @llvm.copysign.f64(double %mag, double %sign)
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %copysign, i32 %idx)
+  ret double %result
+}
+
+define double @trig_preop_strip_fabs_strictfp(double %val, i32 %idx) strictfp {
+; CHECK-LABEL: @trig_preop_strip_fabs_strictfp(
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[VAL:%.*]], i32 [[IDX:%.*]]) #[[ATTR20]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+  %fabs = call double @llvm.fabs.f64(double %val)
+  %result = call double @llvm.amdgcn.trig.preop.f64(double %fabs, i32 %idx) strictfp
+  ret double %result
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.log
 ; --------------------------------------------------------------------