[libcxx-commits] [libcxx] [llvm] [CIR][X86] Add support for vpshl/vpshr builtins (PR #179538)
Priyanshu Kumar via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Feb 4 09:17:36 PST 2026
https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/179538
>From 4d2ec7de3325f3f19b80190604f9805ab5f036dd Mon Sep 17 00:00:00 2001
From: Akash Dutta <137309513+akadutta at users.noreply.github.com>
Date: Fri, 30 Jan 2026 08:26:40 -0600
Subject: [PATCH] Add support for vpshl/vpshr builtins
---
libcxx/include/stdatomic.h | 2 +-
.../include_stdatomic_as_c.sh.cpp | 30 +
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 14 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 36 +-
llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +-
.../InstCombineSimplifyDemanded.cpp | 30 +
.../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 2 +-
...amdgpu-attributor-nocallback-intrinsics.ll | 19 +-
.../AMDGPU/amdgpu-attributor-trap-leaf.ll | 65 ++
.../AMDGPU/av_movimm_pseudo_expansion.mir | 20 +
.../AMDGPU/misaligned-vgpr-regsequence.mir | 30 +
.../test/CodeGen/AMDGPU/peephole-fold-imm.mir | 5 +-
.../siloadstoreopt-misaligned-regsequence.ll | 21 +
.../CodeGen/AMDGPU/v_mov_b64_expansion.mir | 9 +
.../CodeGen/AMDGPU/wmma-gfx12-convergent.mir | 153 ++++-
.../InstCombine/simplify-demanded-fpclass.ll | 649 ++++++++++++++++++
.../AArch64/partial-reduce-chained.ll | 31 +-
18 files changed, 1079 insertions(+), 43 deletions(-)
create mode 100644 libcxx/test/extensions/libcxx/depr/depr.c.headers/include_stdatomic_as_c.sh.cpp
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/misaligned-vgpr-regsequence.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/siloadstoreopt-misaligned-regsequence.ll
diff --git a/libcxx/include/stdatomic.h b/libcxx/include/stdatomic.h
index 2991030eee456..e7b787560ddc4 100644
--- a/libcxx/include/stdatomic.h
+++ b/libcxx/include/stdatomic.h
@@ -231,7 +231,7 @@ using std::atomic_store_explicit _LIBCPP_USING_IF_EXISTS;
using std::atomic_signal_fence _LIBCPP_USING_IF_EXISTS;
using std::atomic_thread_fence _LIBCPP_USING_IF_EXISTS;
-# elif defined(_LIBCPP_COMPILER_CLANG_BASED)
+# elif !defined(__cplusplus) || defined(_LIBCPP_COMPILER_CLANG_BASED)
// Before C++23, we include the next <stdatomic.h> on the path to avoid hijacking
// the header. We do this because Clang has historically shipped a <stdatomic.h>
diff --git a/libcxx/test/extensions/libcxx/depr/depr.c.headers/include_stdatomic_as_c.sh.cpp b/libcxx/test/extensions/libcxx/depr/depr.c.headers/include_stdatomic_as_c.sh.cpp
new file mode 100644
index 0000000000000..31ad5c9053675
--- /dev/null
+++ b/libcxx/test/extensions/libcxx/depr/depr.c.headers/include_stdatomic_as_c.sh.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// We're building as C, so this test doesn't work when building with modules.
+// UNSUPPORTED: clang-modules-build
+
+// GCC complains about unrecognized arguments because we're compiling the
+// file as C, but we're passing C++ flags on the command-line.
+// UNSUPPORTED: gcc
+
+// Test that stdatomic.h gets the C header with its definitions.
+
+// NOTE: It's not common or recommended to have libc++ in the header search
+// path when compiling C files, but it does happen often enough.
+
+// RUN: %{cxx} -c -xc %s -fsyntax-only %{flags} %{compile_flags} -std=c99
+
+#include <stdatomic.h>
+
+int main(int argc, char** argv) {
+ (void)argc;
+ (void)argv;
+ [[maybe_unused]] atomic_bool x;
+ return 0;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 0b2ee6371da06..4bcaabfd3263a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1343,7 +1343,6 @@ struct AAAMDGPUMinAGPRAlloc
Maximum.takeAssumedMaximum(NumRegs);
return true;
}
-
switch (CB.getIntrinsicID()) {
case Intrinsic::not_intrinsic:
break;
@@ -1361,10 +1360,21 @@ struct AAAMDGPUMinAGPRAlloc
return true;
}
+ // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
+ // the nocallback attribute, so the AMDGPU attributor can conservatively
+ // drop all implicitly-known inputs and AGPR allocation information. Make
+ // sure we still infer that no implicit inputs are required and that the
+ // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
+ // function which requires AGPRs, so we need to check if the called
+ // function has the "trap-func-name" attribute.
+ case Intrinsic::trap:
+ case Intrinsic::debugtrap:
+ case Intrinsic::ubsantrap:
+ return CB.hasFnAttr(Attribute::NoCallback) ||
+ !CB.hasFnAttr("trap-func-name");
default:
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
-
// Assume !nocallback intrinsics may call a function which requires
// AGPRs.
return CB.hasFnAttr(Attribute::NoCallback);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index fb82598709aab..e0e0bb0c05ea2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2173,11 +2173,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ const MCInstrDesc &Mov64Desc = get(AMDGPU::V_MOV_B64_e32);
+ const TargetRegisterClass *Mov64RC = getRegClass(Mov64Desc, /*OpNum=*/0);
+
const MachineOperand &SrcOp = MI.getOperand(1);
// FIXME: Will this work for 64-bit floating point immediates?
assert(!SrcOp.isFPImm());
- if (ST.hasMovB64()) {
- MI.setDesc(get(AMDGPU::V_MOV_B64_e32));
+ if (ST.hasMovB64() && Mov64RC->contains(Dst)) {
+ MI.setDesc(Mov64Desc);
if (SrcOp.isReg() || isInlineConstant(MI, 1) ||
isUInt<32>(SrcOp.getImm()) || ST.has64BitLiterals())
break;
@@ -2186,17 +2189,21 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
APInt Imm(64, SrcOp.getImm());
APInt Lo(32, Imm.getLoBits(32).getZExtValue());
APInt Hi(32, Imm.getHiBits(32).getZExtValue());
- if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo)) {
- BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst)
- .addImm(SISrcMods::OP_SEL_1)
- .addImm(Lo.getSExtValue())
- .addImm(SISrcMods::OP_SEL_1)
- .addImm(Lo.getSExtValue())
- .addImm(0) // op_sel_lo
- .addImm(0) // op_sel_hi
- .addImm(0) // neg_lo
- .addImm(0) // neg_hi
- .addImm(0); // clamp
+ const MCInstrDesc &PkMovDesc = get(AMDGPU::V_PK_MOV_B32);
+ const TargetRegisterClass *PkMovRC = getRegClass(PkMovDesc, /*OpNum=*/0);
+
+ if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo) &&
+ PkMovRC->contains(Dst)) {
+ BuildMI(MBB, MI, DL, PkMovDesc, Dst)
+ .addImm(SISrcMods::OP_SEL_1)
+ .addImm(Lo.getSExtValue())
+ .addImm(SISrcMods::OP_SEL_1)
+ .addImm(Lo.getSExtValue())
+ .addImm(0) // op_sel_lo
+ .addImm(0) // op_sel_hi
+ .addImm(0) // neg_lo
+ .addImm(0) // neg_hi
+ .addImm(0); // clamp
} else {
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
.addImm(Lo.getSExtValue())
@@ -5259,7 +5266,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// aligned register constraint.
// FIXME: We do not verify inline asm operands, but custom inline asm
// verification is broken anyway
- if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
+ if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
+ Opcode != AMDGPU::V_MOV_B64_PSEUDO) {
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
if (const TargetRegisterClass *SubRC =
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 604a91fcb6e7f..d30e7fd0523a5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -131,7 +131,7 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
// 64-bit vector move instruction. This is mainly used by the
// SIFoldOperands pass to enable folding of inline immediates.
-def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64_AlignTarget:$vdst),
+def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)> {
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 405e5dddba639..d111b8996ae75 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1397,13 +1397,13 @@ multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator
defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>;
- let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in {
def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
}
}
if convertibleTo3Addr then {
- let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in {
def _threeaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 7b82613c73ea6..630016fe58a19 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -2143,6 +2143,32 @@ static Value *simplifyDemandedFPClassFnegFabs(KnownFPClass &Known, Value *Src,
return nullptr;
}
+static Value *simplifyDemandedFPClassCopysignMag(Value *MagSrc,
+ FPClassTest DemandedMask,
+ KnownFPClass KnownSrc,
+ bool NSZ) {
+ if (NSZ) {
+ constexpr FPClassTest NegOrZero = fcNegative | fcPosZero;
+ constexpr FPClassTest PosOrZero = fcPositive | fcNegZero;
+
+ if ((DemandedMask & ~NegOrZero) == fcNone &&
+ KnownSrc.isKnownAlways(NegOrZero))
+ return MagSrc;
+
+ if ((DemandedMask & ~PosOrZero) == fcNone &&
+ KnownSrc.isKnownAlways(PosOrZero))
+ return MagSrc;
+ } else {
+ if ((DemandedMask & ~fcNegative) == fcNone && KnownSrc.SignBit == true)
+ return MagSrc;
+
+ if ((DemandedMask & ~fcPositive) == fcNone && KnownSrc.SignBit == false)
+ return MagSrc;
+ }
+
+ return nullptr;
+}
+
static Value *
simplifyDemandedFPClassMinMax(KnownFPClass &Known, Intrinsic::ID IID,
const CallInst *CI, FPClassTest DemandedMask,
@@ -2764,6 +2790,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Instruction *I,
return I;
}
+ if (Value *Simplified = simplifyDemandedFPClassCopysignMag(
+ CI->getArgOperand(0), DemandedMask, Known, FMF.noSignedZeros()))
+ return Simplified;
+
KnownFPClass KnownSign = computeKnownFPClass(CI->getArgOperand(1),
fcAllFlags, CxtI, Depth + 1);
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 7ffccf73bd39d..edcdaea4c31da 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -244,7 +244,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
unsigned IterCnt = 0;
(void)IterCnt;
while (LocalChange) {
- assert(IterCnt++ < 1000 && "Iterative simplification didn't converge!");
+ assert(IterCnt++ < 2000 && "Iterative simplification didn't converge!");
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
index 71c509afa8e64..163f1aa72e11f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -35,7 +35,7 @@ define void @use_assume(i1 %arg) {
define void @use_trap() {
; CHECK-LABEL: define void @use_trap(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: ret void
;
@@ -43,9 +43,19 @@ define void @use_trap() {
ret void
}
+define void @use_trap_with_handler() {
+; CHECK-LABEL: define void @use_trap_with_handler(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.trap() #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap() #0
+ ret void
+}
+
define void @use_debugtrap() {
; CHECK-LABEL: define void @use_debugtrap(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.debugtrap()
; CHECK-NEXT: ret void
;
@@ -55,7 +65,7 @@ define void @use_debugtrap() {
define void @use_ubsantrap() {
; CHECK-LABEL: define void @use_ubsantrap(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
; CHECK-NEXT: ret void
;
@@ -63,6 +73,8 @@ define void @use_ubsantrap() {
ret void
}
+
+attributes #0 = { "trap-func-name"="handler" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
@@ -71,4 +83,5 @@ define void @use_ubsantrap() {
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7]] = { "trap-func-name"="handler" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
new file mode 100644
index 0000000000000..e5e7328890146
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+
+; Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have the
+; nocallback attribute, so the AMDGPU attributor used to conservatively drop
+; all implicitly-known inputs and AGPR allocation information. Make sure we
+; still infer that no implicit inputs are required and that the AGPR allocation
+; stays at zero.
+
+declare void @llvm.trap()
+
+declare void @llvm.debugtrap()
+
+define amdgpu_kernel void @trap_kernel() {
+; CHECK-LABEL: define amdgpu_kernel void @trap_kernel(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap()
+ ret void
+}
+
+define amdgpu_kernel void @trap_kernel_with_handler() {
+; CHECK-LABEL: define amdgpu_kernel void @trap_kernel_with_handler(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.trap() #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap() #0
+ ret void
+}
+
+define amdgpu_kernel void @debugtrap_kernel() {
+; CHECK-LABEL: define amdgpu_kernel void @debugtrap_kernel(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void @llvm.debugtrap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.debugtrap()
+ ret void
+}
+
+; Test that a trap with both trap-func-name and nocallback is still safe
+define amdgpu_kernel void @trap_kernel_with_handler_and_nocallback() {
+; CHECK-LABEL: define amdgpu_kernel void @trap_kernel_with_handler_and_nocallback(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void @llvm.trap() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap() #1
+ ret void
+}
+
+attributes #0 = { "trap-func-name"="handler" }
+attributes #1 = { nocallback "trap-func-name"="handler" }
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "trap-func-name"="handler" }
+; CHECK: attributes #[[ATTR5]] = { nocallback "trap-func-name"="handler" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/av_movimm_pseudo_expansion.mir b/llvm/test/CodeGen/AMDGPU/av_movimm_pseudo_expansion.mir
index c52347b680371..d08185a9e0ccd 100644
--- a/llvm/test/CodeGen/AMDGPU/av_movimm_pseudo_expansion.mir
+++ b/llvm/test/CodeGen/AMDGPU/av_movimm_pseudo_expansion.mir
@@ -208,3 +208,23 @@ body: |
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 -16, implicit $exec, implicit-def $vgpr1_vgpr2
$vgpr1_vgpr2 = AV_MOV_B64_IMM_PSEUDO 18446744004990074889, implicit $exec
...
+
+---
+name: av_mov_b64_misalign_vgpr
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: av_mov_b64_misalign_vgpr
+ ; CHECK: $vgpr5 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
+ ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
+ $vgpr5_vgpr6 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+...
+
+---
+name: av_mov_b64_misalign_agpr
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: av_mov_b64_misalign_agpr
+ ; CHECK: $agpr5 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec, implicit-def $agpr5_agpr6
+ ; CHECK-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec, implicit-def $agpr5_agpr6
+ $agpr5_agpr6 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+...
diff --git a/llvm/test/CodeGen/AMDGPU/misaligned-vgpr-regsequence.mir b/llvm/test/CodeGen/AMDGPU/misaligned-vgpr-regsequence.mir
new file mode 100644
index 0000000000000..26a6cc41ad8fa
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/misaligned-vgpr-regsequence.mir
@@ -0,0 +1,30 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-after=si-load-store-opt %s -o - | FileCheck %s
+
+# CHECK: misaligned_regsequence:
+# CHECK: ; %bb.0:
+# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+# CHECK: s_load_dwordx2 s[0:1], s[4:5], 0x0
+# CHECK: v_mov_b32_e32 v5, 0
+# CHECK: v_mov_b32_e32 v4, 0
+# CHECK: v_mov_b32_e32 v6, 0
+# CHECK: s_waitcnt lgkmcnt(0)
+# CHECK: v_mov_b64_e32 v[2:3], s[0:1]
+# CHECK: flat_store_dwordx3 v[2:3], v[4:6]
+# CHECK: s_endpgm
+
+---
+name: misaligned_regsequence
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5
+
+ %0:sgpr_64 = COPY $sgpr4_sgpr5
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %3:vreg_64_align2 = COPY %1
+ %4:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
+ %5:vreg_96_align2 = REG_SEQUENCE killed %2, %subreg.sub0, killed %4, %subreg.sub1_sub2
+ FLAT_STORE_DWORDX3 %3, killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96), align 4)
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
index 21455a9f5074f..176d7752133cf 100644
--- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
@@ -425,7 +425,7 @@ body: |
bb.0:
; GCN-LABEL: name: fold_v_mov_b64_64_to_unaligned
; GCN: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec
- ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]]
%0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec
%1:vreg_64 = COPY killed %0
@@ -437,7 +437,8 @@ name: fold_v_mov_b64_pseudo_64_to_unaligned
body: |
bb.0:
; GCN-LABEL: name: fold_v_mov_b64_pseudo_64_to_unaligned
- ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+ ; GCN: [[V_MOV_B64_PSEUDO:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]]
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
%1:vreg_64 = COPY killed %0
diff --git a/llvm/test/CodeGen/AMDGPU/siloadstoreopt-misaligned-regsequence.ll b/llvm/test/CodeGen/AMDGPU/siloadstoreopt-misaligned-regsequence.ll
new file mode 100644
index 0000000000000..e95aba71775b5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/siloadstoreopt-misaligned-regsequence.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s
+
+define amdgpu_kernel void @foo(ptr %0) {
+; CHECK-LABEL: foo:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v3, v2
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mov_b32_e32 v3, v2
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[2:4]
+; CHECK-NEXT: s_endpgm
+entry:
+ %1 = getelementptr inbounds i8, ptr %0, i64 4
+ store i32 0, ptr %0, align 4
+ store i64 0, ptr %1, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir
index 70e2987454192..4c68c4519302a 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir
+++ b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir
@@ -93,3 +93,12 @@ body: |
bb.0:
$vgpr0_vgpr1 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec
...
+
+# GCN-LABEL: name: v_mov_b64_misalign
+# GCN: $vgpr5 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
+# GCN: $vgpr6 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
+name: v_mov_b64_misalign
+body: |
+ bb.0:
+ $vgpr5_vgpr6 = V_MOV_B64_PSEUDO 0, implicit $exec
+...
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir
index df3e780c61f46..955cf0dbe38d4 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir
@@ -1,11 +1,16 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+
+# machine-sink must not sink WMMA* instructions.
+# Ensure that WMMA instructions are marked as convergent to prevent
+# machine-sink from sinking them.
+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic -run-pass=machine-sink %s -o - | FileCheck %s
---
-name: wmma_test
+name: wmma_test_WMMA_F32_16X16X16_F16_w32_threeaddr
tracksRegLiveness: true
body: |
- ; CHECK-LABEL: name: wmma_test
+ ; CHECK-LABEL: name: wmma_test_WMMA_F32_16X16X16_F16_w32_threeaddr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
@@ -40,3 +45,147 @@ body: |
S_ENDPGM 0
...
+
+---
+name: wmma_test_V_WMMA_F32_16X16X16_F16_twoaddr_w32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: wmma_test_V_WMMA_F32_16X16X16_F16_twoaddr_w32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc:vreg_256 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc:vreg_256 = IMPLICIT_DEF
+ %ssrc:sreg_32 = IMPLICIT_DEF
+ early-clobber %vdst:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, implicit $exec
+ %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ %vcopy:vgpr_32 = COPY %vdst.sub0
+ bb.2:
+ SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: wmma_test_V_WMMA_I32_16X16X16_IU8_twoaddr_w32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: wmma_test_V_WMMA_I32_16X16X16_IU8_twoaddr_w32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc2:vreg_256 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_I32_16X16X16_IU8_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc2, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc:vreg_128 = IMPLICIT_DEF
+ %vsrc2:vreg_256 = IMPLICIT_DEF
+ %ssrc:sreg_32 = IMPLICIT_DEF
+ early-clobber %vdst:vreg_256 = V_WMMA_I32_16X16X16_IU8_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc2, 0, 0, 0, implicit $exec
+ %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ %vcopy:vgpr_32 = COPY %vdst.sub0
+ bb.2:
+ SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: wmma_test_V_WMMA_BF16_16X16X16_BF16_threeaddr_w32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: wmma_test_V_WMMA_BF16_16X16X16_BF16_threeaddr_w32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc:vreg_256 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc:vreg_256 = IMPLICIT_DEF
+ %ssrc:sreg_32 = IMPLICIT_DEF
+ early-clobber %vdst:vreg_256 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, 0, 0, implicit $exec
+ %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ %vcopy:vgpr_32 = COPY %vdst.sub0
+ bb.2:
+ SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: wmma_test_V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: wmma_test_V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vsrc256:vreg_256 = IMPLICIT_DEF
+ ; CHECK-NEXT: %vsrc512:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr %vsrc512, %vsrc512, 8, %vsrc256, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %vsrc256:vreg_256 = IMPLICIT_DEF
+ %vsrc512:vreg_512 = IMPLICIT_DEF
+ %ssrc:sreg_32 = IMPLICIT_DEF
+ early-clobber %vdst:vreg_256 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr %vsrc512, %vsrc512, 8, %vsrc256, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
+ %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ %vcopy:vgpr_32 = COPY %vdst.sub0
+ bb.2:
+ SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
index 6688379665924..7825d380d6e50 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
@@ -2041,3 +2041,652 @@ define nofpclass(nan) float @ret_nonan_fmul_select_nan_other_use_commute(i1 %con
%nan.user = fmul float %select, %y
ret float %nan.user
}
+
+define nofpclass(snan) float @copysign_src_known_positive__sign_known_negative_multiple_use(float nofpclass(nan ninf nnorm nsub nzero) %always.positive, float nofpclass(nan pinf pnorm psub pzero) %always.negative, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(snan) float @copysign_src_known_positive__sign_known_negative_multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[ALWAYS_POSITIVE]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(snan) float @copysign_src_known_negative__sign_known_negative_multiple_use(float nofpclass(nan pinf pnorm psub pzero) %always.negative0, float nofpclass(nan pinf pnorm psub pzero) %always.negative1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(snan) float @copysign_src_known_negative__sign_known_negative_multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE0:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE0]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[ALWAYS_NEGATIVE0]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative0, float %always.negative1)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(snan) float @copysign_src_known_positive__sign_known_positive_multiple_use(float nofpclass(nan ninf nnorm nsub nzero) %always.positive0, float nofpclass(nan ninf nnorm nsub nzero) %always.positive1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(snan) float @copysign_src_known_positive__sign_known_positive_multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE0:%.*]], float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store float [[ALWAYS_POSITIVE0]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[ALWAYS_POSITIVE0]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive0, float %always.positive1)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan) float @ret_nonan__copysign_src_known_negative_or_nan__sign_known_negative_multiple_use(float nofpclass(pinf pnorm psub pzero) %always.negative0.or.nan, float nofpclass(nan pinf pnorm psub pzero) %always.negative1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan) float @ret_nonan__copysign_src_known_negative_or_nan__sign_known_negative_multiple_use
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE0_OR_NAN:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE0_OR_NAN]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[ALWAYS_NEGATIVE0_OR_NAN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative0.or.nan, float %always.negative1)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan ninf nnorm nsub nzero) float @ret_only_positive__copysign__sign_known_negative_multiple_use(float %unknown, float nofpclass(nan pinf pnorm psub pzero) %always.negative, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_only_positive__copysign__sign_known_negative_multiple_use
+; CHECK-SAME: (float [[UNKNOWN:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[UNKNOWN]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float poison
+;
+ %copysign = call float @llvm.copysign.f32(float %unknown, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan ninf nnorm nsub nzero) float @ret_only_positive__copysign__sign_known_positive_multiple_use(float %unknown, float nofpclass(nan ninf nnorm nsub nzero) %always.positive, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_only_positive__copysign__sign_known_positive_multiple_use
+; CHECK-SAME: (float [[UNKNOWN:%.*]], float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %unknown, float %always.positive)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan pinf pnorm psub pzero) float @ret_only_negative__copysign__sign_known_negative_multiple_use(float %unknown, float nofpclass(nan pinf pnorm psub pzero) %always.negative, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_only_negative__copysign__sign_known_negative_multiple_use
+; CHECK-SAME: (float [[UNKNOWN:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[UNKNOWN]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %unknown, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan pinf pnorm psub pzero) float @ret_only_negative__copysign__sign_known_positive_multiple_use(float %unknown, float nofpclass(nan ninf nnorm nsub nzero) %always.positive, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_only_negative__copysign__sign_known_positive_multiple_use
+; CHECK-SAME: (float [[UNKNOWN:%.*]], float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float poison
+;
+ %copysign = call float @llvm.copysign.f32(float %unknown, float %always.positive)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan ninf nnorm nsub nzero) float @ret_only_positive__copysign_multiple_use(float %unknown.mag, float %unknown.sign, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_only_positive__copysign_multiple_use
+; CHECK-SAME: (float [[UNKNOWN_MAG:%.*]], float [[UNKNOWN_SIGN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[UNKNOWN_MAG]], float [[UNKNOWN_SIGN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %unknown.mag, float %unknown.sign)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan pinf pnorm psub pzero) float @ret_only_negative__copysign_multiple_use(float %unknown.mag, float %unknown.sign, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_only_negative__copysign_multiple_use
+; CHECK-SAME: (float [[UNKNOWN_MAG:%.*]], float [[UNKNOWN_SIGN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[UNKNOWN_MAG]], float [[UNKNOWN_SIGN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %unknown.mag, float %unknown.sign)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Fold to direct use of %always.positive
+define nofpclass(nan ninf nnorm nsub nzero) float @ret_only_positive__copysign_src_known_positive__sign_unknown(float nofpclass(nan ninf nnorm nsub nzero) %always.positive, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_only_positive__copysign_src_known_positive__sign_unknown
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: ret float [[ALWAYS_POSITIVE]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive, float %unknown)
+ ret float %copysign
+}
+
+; Fold to direct use of %always.positive
+define nofpclass(nan ninf nnorm nsub nzero) float @ret_only_positive__copysign_src_known_positive__sign_unknown_multiple_use(float nofpclass(nan ninf nnorm nsub nzero) %always.positive, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_only_positive__copysign_src_known_positive__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_POSITIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Fold to direct use of %always.negative
+define nofpclass(nan pinf pnorm psub pzero) float @ret_only_negative__copysign_src_known_negative__sign_unknown(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_only_negative__copysign_src_known_negative__sign_unknown
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: ret float [[ALWAYS_NEGATIVE]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ ret float %copysign
+}
+
+; Fold to direct use of %always.negative
+define nofpclass(nan pinf pnorm psub pzero) float @ret_only_negative__copysign_src_known_negative__sign_unknown_multiple_use(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_only_negative__copysign_src_known_negative__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Negative test
+define nofpclass(nan ninf nnorm nsub nzero) float @ret_only_positive__copysign_src_known_negative__sign_unknown(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_only_positive__copysign_src_known_negative__sign_unknown
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ ret float %copysign
+}
+
+; Negative test
+define nofpclass(nan ninf nnorm nsub nzero) float @ret_only_positive__copysign_src_known_negative__sign_unknown_multiple_use(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @ret_only_positive__copysign_src_known_negative__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Negative test
+define nofpclass(nan pinf pnorm psub pzero) float @ret_only_negative__copysign_src_known_positive__sign_unknown(float nofpclass(nan ninf nnorm nsub nzero) %always.positive, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_only_negative__copysign_src_known_positive__sign_unknown
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[ALWAYS_POSITIVE]]
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive, float %unknown)
+ ret float %copysign
+}
+
+; Could still be positive for +inf input
+define nofpclass(nan nnorm nsub nzero) float @ret_only_positive_or_ninf__copysign_src_known_positive__sign_unknown(float nofpclass(nan ninf nnorm nsub nzero) %always.positive, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan nzero nsub nnorm) float @ret_only_positive_or_ninf__copysign_src_known_positive__sign_unknown
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_POSITIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive, float %unknown)
+ ret float %copysign
+}
+
+; The ninf flag gives the missing no-infs
+define nofpclass(nan nnorm nsub nzero) float @ret_only_positive_or_ninf__copysign_ninf_src_known_positive__sign_unknown(float nofpclass(nan ninf nnorm nsub nzero) %always.positive, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan nzero nsub nnorm) float @ret_only_positive_or_ninf__copysign_ninf_src_known_positive__sign_unknown
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: ret float [[ALWAYS_POSITIVE]]
+;
+ %copysign = call ninf float @llvm.copysign.f32(float %always.positive, float %unknown)
+ ret float %copysign
+}
+
+; The ninf flag gives the missing no-infs
+define nofpclass(nan nnorm nsub nzero) float @ret_only_positive_or_ninf__copysign_ninf_src_known_positive__sign_unknown_multiple_use(float nofpclass(nan ninf nnorm nsub nzero) %always.positive, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan nzero nsub nnorm) float @ret_only_positive_or_ninf__copysign_ninf_src_known_positive__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call ninf float @llvm.copysign.f32(float [[ALWAYS_POSITIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call ninf float @llvm.copysign.f32(float %always.positive, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; No pinf from argument
+define nofpclass(nan nnorm nsub nzero) float @ret_only_positive_or_ninf__copysign_src_known_positive_no_inf__sign_unknown(float nofpclass(nan inf nnorm nsub nzero) %always.positive.no.inf, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan nzero nsub nnorm) float @ret_only_positive_or_ninf__copysign_src_known_positive_no_inf__sign_unknown
+; CHECK-SAME: (float nofpclass(nan inf nzero nsub nnorm) [[ALWAYS_POSITIVE_NO_INF:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_POSITIVE_NO_INF]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive.no.inf, float %unknown)
+ ret float %copysign
+}
+
+; No pinf from argument
+define nofpclass(nan nnorm nsub nzero) float @ret_only_positive_or_ninf__copysign_src_known_positive_no_inf__sign_unknown_multiple_use(float nofpclass(nan inf nnorm nsub nzero) %always.positive.no.inf, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan nzero nsub nnorm) float @ret_only_positive_or_ninf__copysign_src_known_positive_no_inf__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan inf nzero nsub nnorm) [[ALWAYS_POSITIVE_NO_INF:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_POSITIVE_NO_INF]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive.no.inf, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan ninf nnorm nsub) float @ret_only_positive_or_zero__copysign_src_known_positive__sign_unknown(float nofpclass(nan ninf nnorm nsub) %always.positive.or.zero, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan ninf nsub nnorm) float @ret_only_positive_or_zero__copysign_src_known_positive__sign_unknown
+; CHECK-SAME: (float nofpclass(nan ninf nsub nnorm) [[ALWAYS_POSITIVE_OR_ZERO:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_POSITIVE_OR_ZERO]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive.or.zero, float %unknown)
+ ret float %copysign
+}
+
+; Ignore 0 mismatch with nsz
+define nofpclass(nan ninf nnorm nsub) float @ret_only_positive_or_zero__copysign_nsz_src_known_positive__sign_unknown(float nofpclass(nan ninf nnorm nsub) %always.positive.or.zero, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan ninf nsub nnorm) float @ret_only_positive_or_zero__copysign_nsz_src_known_positive__sign_unknown
+; CHECK-SAME: (float nofpclass(nan ninf nsub nnorm) [[ALWAYS_POSITIVE_OR_ZERO:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: ret float [[ALWAYS_POSITIVE_OR_ZERO]]
+;
+ %copysign = call nsz float @llvm.copysign.f32(float %always.positive.or.zero, float %unknown)
+ ret float %copysign
+}
+
+; Do not use nsz with multiple uses
+define nofpclass(nan ninf nnorm nsub) float @ret_only_positive_or_zero__copysign_nsz_src_known_positive__sign_unknown_multiple_use(float nofpclass(nan ninf nnorm nsub) %always.positive.or.zero, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan ninf nsub nnorm) float @ret_only_positive_or_zero__copysign_nsz_src_known_positive__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nsub nnorm) [[ALWAYS_POSITIVE_OR_ZERO:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call nsz float @llvm.copysign.f32(float [[ALWAYS_POSITIVE_OR_ZERO]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call nsz float @llvm.copysign.f32(float %always.positive.or.zero, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(nan pinf pnorm psub) float @ret_only_negative_or_pzero__copysign_src_known_negative__sign_unknown(float nofpclass(nan pinf pnorm psub) %always.negative.or.zero, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan pinf psub pnorm) float @ret_only_negative_or_pzero__copysign_src_known_negative__sign_unknown
+; CHECK-SAME: (float nofpclass(nan pinf psub pnorm) [[ALWAYS_NEGATIVE_OR_ZERO:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_ZERO]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.zero, float %unknown)
+ ret float %copysign
+}
+
+; Ignore 0 mismatch with nsz
+define nofpclass(nan pinf pnorm psub) float @ret_only_negative_or_pzero__copysign_nsz_src_known_negative__sign_unknown(float nofpclass(nan pinf pnorm psub) %always.negative.or.zero, float %unknown) {
+; CHECK-LABEL: define nofpclass(nan pinf psub pnorm) float @ret_only_negative_or_pzero__copysign_nsz_src_known_negative__sign_unknown
+; CHECK-SAME: (float nofpclass(nan pinf psub pnorm) [[ALWAYS_NEGATIVE_OR_ZERO:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: ret float [[ALWAYS_NEGATIVE_OR_ZERO]]
+;
+ %copysign = call nsz float @llvm.copysign.f32(float %always.negative.or.zero, float %unknown)
+ ret float %copysign
+}
+
+; Do not use nsz with multiple uses
+define nofpclass(nan pinf pnorm psub) float @ret_only_negative_or_pzero__copysign_nsz_src_known_negative__sign_unknown_multiple_use(float nofpclass(nan pinf pnorm psub) %always.negative.or.zero, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan pinf psub pnorm) float @ret_only_negative_or_pzero__copysign_nsz_src_known_negative__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf psub pnorm) [[ALWAYS_NEGATIVE_OR_ZERO:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call nsz float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_ZERO]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call nsz float @llvm.copysign.f32(float %always.negative.or.zero, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_negative_or_nan__copysign_src_known_negative__sign_unknown(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_negative_or_nan__copysign_src_known_negative__sign_unknown
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ ret float %copysign
+}
+
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_negative_or_nan__copysign_src_known_negative__sign_unknown_multiple_use(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_negative_or_nan__copysign_src_known_negative__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_negative_or_nan__copysign_src_known_negative_or_nan__sign_unknown(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_negative_or_nan__copysign_src_known_negative_or_nan__sign_unknown
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ ret float %copysign
+}
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_negative_or_nan__copysign_src_known_negative_or_nan__sign_unknown_multiple_use(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_negative_or_nan__copysign_src_known_negative_or_nan__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Take nnan from flag
+define nofpclass(pinf pnorm psub pzero) float @ret_only_negative_or_nan__copysign_nnan_src_known_negative_or_nan__sign_unknown(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_negative_or_nan__copysign_nnan_src_known_negative_or_nan__sign_unknown
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: ret float [[ALWAYS_NEGATIVE_OR_NAN]]
+;
+ %copysign = call nnan float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ ret float %copysign
+}
+
+; Take nnan from flag
+define nofpclass(pinf pnorm psub pzero) float @ret_only_negative_or_nan__copysign_nnan_src_known_negative_or_nan__sign_unknown_multiple_use(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_negative_or_nan__copysign_nnan_src_known_negative_or_nan__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call nnan float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call nnan float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_positive_or_nan__copysign_src_known_positive__sign_unknown(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_positive_or_nan__copysign_src_known_positive__sign_unknown
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ ret float %copysign
+}
+
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_positive_or_nan__copysign_src_known_positive__sign_unknown_multiple_use(float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_positive_or_nan__copysign_src_known_positive__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_positive_or_nan__copysign_src_known_positive_or_nan__sign_unknown(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_positive_or_nan__copysign_src_known_positive_or_nan__sign_unknown
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[UNKNOWN]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ ret float %copysign
+}
+
+define nofpclass(pinf pnorm psub pzero) float @ret_only_positive_or_nan__copysign_src_known_positive_or_nan__sign_unknown_multiple_use(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_positive_or_nan__copysign_src_known_positive_or_nan__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Take nnan from flag
+define nofpclass(pinf pnorm psub pzero) float @ret_only_positive_or_nan__copysign_nnan_src_known_positive_or_nan__sign_unknown(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_positive_or_nan__copysign_nnan_src_known_positive_or_nan__sign_unknown
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]]) {
+; CHECK-NEXT: ret float [[ALWAYS_NEGATIVE_OR_NAN]]
+;
+ %copysign = call nnan float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ ret float %copysign
+}
+
+; Take nnan from flag
+define nofpclass(pinf pnorm psub pzero) float @ret_only_positive_or_nan__copysign_nnan_src_known_positive_or_nan__sign_unknown_multiple_use(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float %unknown, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_only_positive_or_nan__copysign_nnan_src_known_positive_or_nan__sign_unknown_multiple_use
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float [[UNKNOWN:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call nnan float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[UNKNOWN]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call nnan float @llvm.copysign.f32(float %always.negative.or.nan, float %unknown)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; We can only tell the sign bit is negative due to the ninf flag
+define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_negative_or_pinf(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float nofpclass(nan pnorm psub pzero) %always.negative.or.pinf) {
+; CHECK-LABEL: define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_negative_or_pinf
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float nofpclass(nan pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_PINF:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call ninf float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[ALWAYS_NEGATIVE_OR_PINF]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call ninf float @llvm.copysign.f32(float %always.negative.or.nan, float %always.negative.or.pinf)
+ ret float %copysign
+}
+
+; We can only tell the sign bit is negative due to the ninf flag
+define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use(float nofpclass(pinf pnorm psub pzero) %always.negative.or.nan, float nofpclass(nan pnorm psub pzero) %always.negative.or.pinf, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use
+; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_NAN:%.*]], float nofpclass(nan pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_PINF:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call ninf float @llvm.copysign.f32(float [[ALWAYS_NEGATIVE_OR_NAN]], float [[ALWAYS_NEGATIVE_OR_PINF]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call ninf float @llvm.copysign.f32(float %always.negative.or.nan, float %always.negative.or.pinf)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; We can only tell the sign bit is positive due to the ninf flag
+define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_positive_or_ninf(float nofpclass(ninf nnorm nsub nzero) %always.positive.or.nan, float nofpclass(nan nnorm nsub nzero) %always.negative.or.pinf) {
+; CHECK-LABEL: define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_positive_or_ninf
+; CHECK-SAME: (float nofpclass(ninf nzero nsub nnorm) [[ALWAYS_POSITIVE_OR_NAN:%.*]], float nofpclass(nan nzero nsub nnorm) [[ALWAYS_NEGATIVE_OR_PINF:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call ninf float @llvm.copysign.f32(float [[ALWAYS_POSITIVE_OR_NAN]], float [[ALWAYS_NEGATIVE_OR_PINF]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call ninf float @llvm.copysign.f32(float %always.positive.or.nan, float %always.negative.or.pinf)
+ ret float %copysign
+}
+
+; We can only tell the sign bit is positive due to the ninf flag
+define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_positive_or_ninf__multiple_use(float nofpclass(ninf nnorm nsub nzero) %always.positive.or.nan, float nofpclass(nan nnorm nsub nzero) %always.negative.or.pinf, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan) float @ret_no_nan__copysign_ninf__src_known_negative_or_nan__sign_known_positive_or_ninf__multiple_use
+; CHECK-SAME: (float nofpclass(ninf nzero nsub nnorm) [[ALWAYS_POSITIVE_OR_NAN:%.*]], float nofpclass(nan nzero nsub nnorm) [[ALWAYS_NEGATIVE_OR_PINF:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call ninf float @llvm.copysign.f32(float [[ALWAYS_POSITIVE_OR_NAN]], float [[ALWAYS_NEGATIVE_OR_PINF]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call ninf float @llvm.copysign.f32(float %always.positive.or.nan, float %always.negative.or.pinf)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is negative due to no-inf return
+define nofpclass(nan inf) float @ret_no_nan_no_inf__copysign__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use(float nofpclass(nan pnorm psub pzero) %always.negative.or.pinf, float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown0, float %unknown1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan inf) float @ret_no_nan_no_inf__copysign__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use
+; CHECK-SAME: (float nofpclass(nan pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_PINF:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN0:%.*]], float [[UNKNOWN1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE_OR_PINF]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[ALWAYS_NEGATIVE_OR_PINF]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.pinf, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; No-pinf result not sufficient
+define nofpclass(nan pinf) float @ret_no_nan_no_pinf__copysign__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use(float nofpclass(nan pnorm psub pzero) %always.negative.or.pinf, float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown0, float %unknown1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan pinf) float @ret_no_nan_no_pinf__copysign__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use
+; CHECK-SAME: (float nofpclass(nan pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_PINF:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN0:%.*]], float [[UNKNOWN1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE_OR_PINF]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.pinf, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; No-ninf result not sufficient
+define nofpclass(nan ninf) float @ret_no_nan_no_ninf__copysign__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use(float nofpclass(nan pnorm psub pzero) %always.negative.or.pinf, float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown0, float %unknown1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan ninf) float @ret_no_nan_no_ninf__copysign__src_known_negative_or_nan__sign_known_negative_or_pinf__multiple_use
+; CHECK-SAME: (float nofpclass(nan pzero psub pnorm) [[ALWAYS_NEGATIVE_OR_PINF:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN0:%.*]], float [[UNKNOWN1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE_OR_PINF]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.pinf, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is negative due to no-norm return
+define nofpclass(nan norm) float @ret_no_nan_no_sub__copysign__src_known_negative_or_nan__sign_known_negative_or_pnorm__multiple_use(float nofpclass(nan pinf psub pzero) %always.negative.or.pnorm, float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown0, float %unknown1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan norm) float @ret_no_nan_no_sub__copysign__src_known_negative_or_nan__sign_known_negative_or_pnorm__multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf pzero psub) [[ALWAYS_NEGATIVE_OR_PNORM:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN0:%.*]], float [[UNKNOWN1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE_OR_PNORM]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.pnorm, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is negative due to no-sub return
+define nofpclass(nan sub) float @ret_no_nan_no_sub__copysign__src_known_negative_or_nan__sign_known_negative_or_psub__multiple_use(float nofpclass(nan pinf pnorm pzero) %always.negative.or.psub, float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown0, float %unknown1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan sub) float @ret_no_nan_no_sub__copysign__src_known_negative_or_nan__sign_known_negative_or_psub__multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf pzero pnorm) [[ALWAYS_NEGATIVE_OR_PSUB:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN0:%.*]], float [[UNKNOWN1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE_OR_PSUB]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.psub, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is negative due to no-zero return
+define nofpclass(nan zero) float @ret_no_nan_no_zero__copysign__src_known_negative_or_nan__sign_known_negative_or_pzero__multiple_use(float nofpclass(nan pinf pnorm psub) %always.negative.or.pzero, float nofpclass(nan pinf pnorm psub pzero) %always.negative, float %unknown0, float %unknown1, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan zero) float @ret_no_nan_no_zero__copysign__src_known_negative_or_nan__sign_known_negative_or_pzero__multiple_use
+; CHECK-SAME: (float nofpclass(nan pinf psub pnorm) [[ALWAYS_NEGATIVE_OR_PZERO:%.*]], float nofpclass(nan pinf pzero psub pnorm) [[ALWAYS_NEGATIVE:%.*]], float [[UNKNOWN0:%.*]], float [[UNKNOWN1:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_NEGATIVE_OR_PZERO]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.negative.or.pzero, float %always.negative)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is positive due to no-inf return
+define nofpclass(nan inf) float @ret_no_nan_no_inf__copysign__src_known_positive_or_nan__sign_known_positive_or_ninf__multiple_use(float nofpclass(nan nnorm nsub nzero) %always.positive.or.ninf, float nofpclass(nan ninf nnorm nsub nzero) %always.positive, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan inf) float @ret_no_nan_no_inf__copysign__src_known_positive_or_nan__sign_known_positive_or_ninf__multiple_use
+; CHECK-SAME: (float nofpclass(nan nzero nsub nnorm) [[ALWAYS_POSITIVE_OR_NINF:%.*]], float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_POSITIVE_OR_NINF]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[ALWAYS_POSITIVE_OR_NINF]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive.or.ninf, float %always.positive)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is positive due to no-norm return
+define nofpclass(nan norm) float @ret_no_nan_no_norm__copysign__src_known_positive_or_nan__sign_known_positive_or_norm__multiple_use(float nofpclass(nan ninf nsub nzero) %always.positive.or.nnorm, float nofpclass(nan ninf nnorm nsub nzero) %always.positive, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan norm) float @ret_no_nan_no_norm__copysign__src_known_positive_or_nan__sign_known_positive_or_norm__multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nzero nsub) [[ALWAYS_POSITIVE_OR_NNORM:%.*]], float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_POSITIVE_OR_NNORM]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive.or.nnorm, float %always.positive)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is positive due to no-sub return
+define nofpclass(nan sub) float @ret_no_nan_no_sub__copysign__src_known_positive_or_nan__sign_known_positive_or_sub__multiple_use(float nofpclass(nan ninf nnorm nzero) %always.positive.or.nsub, float nofpclass(nan ninf nnorm nsub nzero) %always.positive, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan sub) float @ret_no_nan_no_sub__copysign__src_known_positive_or_nan__sign_known_positive_or_sub__multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nzero nnorm) [[ALWAYS_POSITIVE_OR_NSUB:%.*]], float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_POSITIVE_OR_NSUB]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive.or.nsub, float %always.positive)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
+
+; Can infer sign bit is positive due to no-zero return
+define nofpclass(nan zero) float @ret_no_nan_no_zero__copysign__src_known_positive_or_nan__sign_known_positive_or_zero__multiple_use(float nofpclass(nan ninf nnorm nsub) %always.positive.or.nzero, float nofpclass(nan ninf nnorm nsub nzero) %always.positive, ptr %ptr) {
+; CHECK-LABEL: define nofpclass(nan zero) float @ret_no_nan_no_zero__copysign__src_known_positive_or_nan__sign_known_positive_or_zero__multiple_use
+; CHECK-SAME: (float nofpclass(nan ninf nsub nnorm) [[ALWAYS_POSITIVE_OR_NZERO:%.*]], float nofpclass(nan ninf nzero nsub nnorm) [[ALWAYS_POSITIVE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[ALWAYS_POSITIVE_OR_NZERO]])
+; CHECK-NEXT: store float [[COPYSIGN]], ptr [[PTR]], align 4
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %copysign = call float @llvm.copysign.f32(float %always.positive.or.nzero, float %always.positive)
+ store float %copysign, ptr %ptr
+ ret float %copysign
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
index f90b26013fcbc..f1dee958fa09c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 4
-; RUN: opt --mattr=+neon,+dotprod -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-NEON
-; RUN: opt --mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-SVE
-; RUN: opt --mattr=+sve -vectorizer-maximize-bandwidth -passes=loop-vectorize -force-vector-width=8 -scalable-vectorization=preferred -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-SVE-MAXBW
+; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+neon,+dotprod -S < %s | FileCheck %s --check-prefixes=CHECK-NEON
+; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+sve -S < %s | FileCheck %s --check-prefixes=CHECK-SVE
+; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+sve \
+; RUN: -vectorizer-maximize-bandwidth -force-vector-width=8 -S < %s | FileCheck %s --check-prefixes=CHECK-SVE-MAXBW
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-none-unknown-elf"
@@ -158,7 +159,7 @@ for.body: ; preds = %for.body.preheader,
%sub = sub i32 %add, %mul.ac
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
@@ -309,7 +310,7 @@ for.body: ; preds = %for.body.preheader,
%add.2 = add i32 %add, %mul.ac
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
@@ -465,7 +466,7 @@ for.body: ; preds = %for.body.preheader,
%add = add i32 %sub, %mul.ac
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
@@ -625,7 +626,7 @@ for.body: ; preds = %for.body.preheader,
%sub.2 = sub i32 %sub, %mul.ac
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
@@ -787,7 +788,7 @@ for.body: ; preds = %for.body.preheader,
%sub.2 = add i32 %add, %mul.bc
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
@@ -955,7 +956,7 @@ for.body: ; preds = %for.body.preheader,
%sub.2 = sub i32 %add, %mul.bc
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
@@ -1103,7 +1104,7 @@ for.body: ; preds = %for.body.preheader,
%add2 = add i32 %add, %c.ext
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
@@ -1235,7 +1236,7 @@ for.body: ; preds = %for.body.preheader,
%add2 = add i32 %add, %b.ext
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
@@ -1384,11 +1385,11 @@ for.body: ; preds = %for.body.preheader,
%add2 = add i32 %add, %mul.ab
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
}
attributes #0 = { vscale_range(1,16) }
-
-!0 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
-!1 = distinct !{!0}
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
+!2 = !{!"llvm.loop.interleave.count", i32 1}
More information about the libcxx-commits
mailing list