[llvm] [AMDGPU] Remove widen-16-bit-ops from CGP (PR #145483)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 02:40:31 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
---
Patch is 131.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145483.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (-294)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll (+4-4)
- (removed) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll (-2853)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 22b921fb2084f..5f1983791cfae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -45,12 +45,6 @@ static cl::opt<bool> WidenLoads(
cl::ReallyHidden,
cl::init(false));
-static cl::opt<bool> Widen16BitOps(
- "amdgpu-codegenprepare-widen-16-bit-ops",
- cl::desc(
- "Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"),
- cl::ReallyHidden, cl::init(false));
-
static cl::opt<bool>
BreakLargePHIs("amdgpu-codegenprepare-break-large-phis",
cl::desc("Break large PHI nodes for DAGISel"),
@@ -150,18 +144,6 @@ class AMDGPUCodeGenPrepareImpl
bool canBreakPHINode(const PHINode &I);
- /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
- /// binary operation \p V.
- ///
- /// \returns Binary operation \p V.
- /// \returns \p T's base element bit width.
- unsigned getBaseElementBitWidth(const Type *T) const;
-
- /// \returns Equivalent 32 bit integer type for given type \p T. For example,
- /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32>
- /// is returned.
- Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
-
/// \returns True if binary operation \p I is a signed binary operation, false
/// otherwise.
bool isSigned(const BinaryOperator &I) const;
@@ -170,10 +152,6 @@ class AMDGPUCodeGenPrepareImpl
/// signed 'icmp' operation, false otherwise.
bool isSigned(const SelectInst &I) const;
- /// \returns True if type \p T needs to be promoted to 32 bit integer type,
- /// false otherwise.
- bool needsPromotionToI32(const Type *T) const;
-
/// Return true if \p T is a legal scalar floating point type.
bool isLegalFloatingTy(const Type *T) const;
@@ -188,52 +166,6 @@ class AMDGPUCodeGenPrepareImpl
computeKnownFPClass(V, fcSubnormal, CtxI).isKnownNeverSubnormal();
}
- /// Promotes uniform binary operation \p I to equivalent 32 bit binary
- /// operation.
- ///
- /// \details \p I's base element bit width must be greater than 1 and less
- /// than or equal 16. Promotion is done by sign or zero extending operands to
- /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and
- /// truncating the result of 32 bit binary operation back to \p I's original
- /// type. Division operation is not promoted.
- ///
- /// \returns True if \p I is promoted to equivalent 32 bit binary operation,
- /// false otherwise.
- bool promoteUniformOpToI32(BinaryOperator &I) const;
-
- /// Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation.
- ///
- /// \details \p I's base element bit width must be greater than 1 and less
- /// than or equal 16. Promotion is done by sign or zero extending operands to
- /// 32 bits, and replacing \p I with 32 bit 'icmp' operation.
- ///
- /// \returns True.
- bool promoteUniformOpToI32(ICmpInst &I) const;
-
- /// Promotes uniform 'select' operation \p I to 32 bit 'select'
- /// operation.
- ///
- /// \details \p I's base element bit width must be greater than 1 and less
- /// than or equal 16. Promotion is done by sign or zero extending operands to
- /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the
- /// result of 32 bit 'select' operation back to \p I's original type.
- ///
- /// \returns True.
- bool promoteUniformOpToI32(SelectInst &I) const;
-
- /// Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse'
- /// intrinsic.
- ///
- /// \details \p I's base element bit width must be greater than 1 and less
- /// than or equal 16. Promotion is done by zero extending the operand to 32
- /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the
- /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the
- /// shift amount is 32 minus \p I's base element bit width), and truncating
- /// the result of the shift operation back to \p I's original type.
- ///
- /// \returns True.
- bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
-
/// \returns The minimum number of bits needed to store the value of \Op as an
/// unsigned integer. Truncating to this size and then zero-extending to
/// the original will not change the value.
@@ -320,13 +252,11 @@ class AMDGPUCodeGenPrepareImpl
bool visitInstruction(Instruction &I) { return false; }
bool visitBinaryOperator(BinaryOperator &I);
bool visitLoadInst(LoadInst &I);
- bool visitICmpInst(ICmpInst &I);
bool visitSelectInst(SelectInst &I);
bool visitPHINode(PHINode &I);
bool visitAddrSpaceCastInst(AddrSpaceCastInst &I);
bool visitIntrinsicInst(IntrinsicInst &I);
- bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
bool visitFMinLike(IntrinsicInst &I);
bool visitSqrt(IntrinsicInst &I);
bool run();
@@ -380,22 +310,6 @@ bool AMDGPUCodeGenPrepareImpl::run() {
return MadeChange;
}
-unsigned AMDGPUCodeGenPrepareImpl::getBaseElementBitWidth(const Type *T) const {
- assert(needsPromotionToI32(T) && "T does not need promotion to i32");
-
- if (T->isIntegerTy())
- return T->getIntegerBitWidth();
- return cast<VectorType>(T)->getElementType()->getIntegerBitWidth();
-}
-
-Type *AMDGPUCodeGenPrepareImpl::getI32Ty(IRBuilder<> &B, const Type *T) const {
- assert(needsPromotionToI32(T) && "T does not need promotion to i32");
-
- if (T->isIntegerTy())
- return B.getInt32Ty();
- return FixedVectorType::get(B.getInt32Ty(), cast<FixedVectorType>(T));
-}
-
bool AMDGPUCodeGenPrepareImpl::isSigned(const BinaryOperator &I) const {
return I.getOpcode() == Instruction::AShr ||
I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
@@ -406,59 +320,11 @@ bool AMDGPUCodeGenPrepareImpl::isSigned(const SelectInst &I) const {
cast<ICmpInst>(I.getOperand(0))->isSigned();
}
-bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
- if (!Widen16BitOps)
- return false;
-
- const IntegerType *IntTy = dyn_cast<IntegerType>(T);
- if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
- return true;
-
- if (const VectorType *VT = dyn_cast<VectorType>(T)) {
- // TODO: The set of packed operations is more limited, so may want to
- // promote some anyway.
- if (ST.hasVOP3PInsts())
- return false;
-
- return needsPromotionToI32(VT->getElementType());
- }
-
- return false;
-}
-
bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const {
return Ty->isFloatTy() || Ty->isDoubleTy() ||
(Ty->isHalfTy() && ST.has16BitInsts());
}
-// Return true if the op promoted to i32 should have nsw set.
-static bool promotedOpIsNSW(const Instruction &I) {
- switch (I.getOpcode()) {
- case Instruction::Shl:
- case Instruction::Add:
- case Instruction::Sub:
- return true;
- case Instruction::Mul:
- return I.hasNoUnsignedWrap();
- default:
- return false;
- }
-}
-
-// Return true if the op promoted to i32 should have nuw set.
-static bool promotedOpIsNUW(const Instruction &I) {
- switch (I.getOpcode()) {
- case Instruction::Shl:
- case Instruction::Add:
- case Instruction::Mul:
- return true;
- case Instruction::Sub:
- return I.hasNoUnsignedWrap();
- default:
- return false;
- }
-}
-
bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
Type *Ty = I.getType();
int TySize = DL.getTypeSizeInBits(Ty);
@@ -467,134 +333,6 @@ bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I);
}
-bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const {
- assert(needsPromotionToI32(I.getType()) &&
- "I does not need promotion to i32");
-
- if (I.getOpcode() == Instruction::SDiv ||
- I.getOpcode() == Instruction::UDiv ||
- I.getOpcode() == Instruction::SRem ||
- I.getOpcode() == Instruction::URem)
- return false;
-
- IRBuilder<> Builder(&I);
- Builder.SetCurrentDebugLocation(I.getDebugLoc());
-
- Type *I32Ty = getI32Ty(Builder, I.getType());
- Value *ExtOp0 = nullptr;
- Value *ExtOp1 = nullptr;
- Value *ExtRes = nullptr;
- Value *TruncRes = nullptr;
-
- if (isSigned(I)) {
- ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
- ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
- } else {
- ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
- ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
- }
-
- ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
- if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
- if (promotedOpIsNSW(cast<Instruction>(I)))
- Inst->setHasNoSignedWrap();
-
- if (promotedOpIsNUW(cast<Instruction>(I)))
- Inst->setHasNoUnsignedWrap();
-
- if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
- Inst->setIsExact(ExactOp->isExact());
- }
-
- TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
-
- I.replaceAllUsesWith(TruncRes);
- I.eraseFromParent();
-
- return true;
-}
-
-bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(ICmpInst &I) const {
- assert(needsPromotionToI32(I.getOperand(0)->getType()) &&
- "I does not need promotion to i32");
-
- IRBuilder<> Builder(&I);
- Builder.SetCurrentDebugLocation(I.getDebugLoc());
-
- Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
- Value *ExtOp0 = nullptr;
- Value *ExtOp1 = nullptr;
- Value *NewICmp = nullptr;
-
- if (I.isSigned()) {
- ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
- ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
- } else {
- ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
- ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
- }
- NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
-
- I.replaceAllUsesWith(NewICmp);
- I.eraseFromParent();
-
- return true;
-}
-
-bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(SelectInst &I) const {
- assert(needsPromotionToI32(I.getType()) &&
- "I does not need promotion to i32");
-
- IRBuilder<> Builder(&I);
- Builder.SetCurrentDebugLocation(I.getDebugLoc());
-
- Type *I32Ty = getI32Ty(Builder, I.getType());
- Value *ExtOp1 = nullptr;
- Value *ExtOp2 = nullptr;
- Value *ExtRes = nullptr;
- Value *TruncRes = nullptr;
-
- if (isSigned(I)) {
- ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
- ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
- } else {
- ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
- ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
- }
- ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
- TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
-
- I.replaceAllUsesWith(TruncRes);
- I.eraseFromParent();
-
- return true;
-}
-
-bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
- IntrinsicInst &I) const {
- assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
- "I must be bitreverse intrinsic");
- assert(needsPromotionToI32(I.getType()) &&
- "I does not need promotion to i32");
-
- IRBuilder<> Builder(&I);
- Builder.SetCurrentDebugLocation(I.getDebugLoc());
-
- Type *I32Ty = getI32Ty(Builder, I.getType());
- Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
- Value *ExtRes =
- Builder.CreateIntrinsic(Intrinsic::bitreverse, {I32Ty}, {ExtOp});
- Value *LShrOp =
- Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
- Value *TruncRes =
- Builder.CreateTrunc(LShrOp, I.getType());
-
- I.replaceAllUsesWith(TruncRes);
- I.eraseFromParent();
-
- return true;
-}
-
unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const {
return computeKnownBits(Op, DL, AC).countMaxActiveBits();
}
@@ -1635,10 +1373,6 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
if (foldBinOpIntoSelect(I))
return true;
- if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA.isUniform(&I) && promoteUniformOpToI32(I))
- return true;
-
if (UseMul24Intrin && replaceMulWithMul24(I))
return true;
if (tryNarrowMathIfNoOverflow(&I, ST.getTargetLowering(),
@@ -1770,16 +1504,6 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
return false;
}
-bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) {
- bool Changed = false;
-
- if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
- UA.isUniform(&I))
- Changed |= promoteUniformOpToI32(I);
-
- return Changed;
-}
-
bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Value *Cond = I.getCondition();
Value *TrueVal = I.getTrueValue();
@@ -1787,12 +1511,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Value *CmpVal;
CmpPredicate Pred;
- if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) {
- if (UA.isUniform(&I))
- return promoteUniformOpToI32(I);
- return false;
- }
-
// Match fract pattern with nan check.
if (!match(Cond, m_FCmp(Pred, m_Value(CmpVal), m_NonNaN())))
return false;
@@ -2196,8 +1914,6 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
- case Intrinsic::bitreverse:
- return visitBitreverseIntrinsicInst(I);
case Intrinsic::minnum:
case Intrinsic::minimumnum:
case Intrinsic::minimum:
@@ -2209,16 +1925,6 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
}
}
-bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
- bool Changed = false;
-
- if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA.isUniform(&I))
- Changed |= promoteUniformBitreverseToI32(I);
-
- return Changed;
-}
-
/// Match non-nan fract pattern.
/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
/// minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
index 32e461ba09f06..e1ef3f9be0a5d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) {
; GCN-LABEL: s_andn2_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
index 12b37c386c140..afabc7b62386f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) {
; GCN-LABEL: s_orn2_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
deleted file mode 100644
index 8945708e0f0ca..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
+++ /dev/null
@@ -1,2853 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -amdgpu-codegenprepare-widen-16-bit-ops -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
-; RUN: opt -S -amdgpu-codegenprepare-widen-16-bit-ops -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
-
-define amdgpu_kernel void @add_i3(i3 %a, i3 %b) {
-; SI-LABEL: @add_i3(
-; SI-NEXT: [[R:%.*]] = add i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) poison, align 1
-; SI-NEXT: ret void
-;
-; VI-LABEL: @add_i3(
-; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
-; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
-; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
-; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) poison, align 1
-; VI-NEXT: ret void
-;
- %r = add i3 %a, %b
- store volatile i3 %r, ptr addrspace(1) poison
- ret void
-}
-
-define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) {
-; SI-LABEL: @add_nsw_i3(
-; SI-NEXT: [[R:%.*]] = add nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) poison, align 1
-; SI-NEXT: ret void
-;
-; VI-LABEL: @add_nsw_i3(
-; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
-; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
-; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
-; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) poison, align 1
-; VI-NEXT: ret void
-;
- %r = add nsw i3 %a, %b
- store volatile i3 %r, ptr addrspace(1) poison
- ret void
-}
-
-define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) {
-; SI-LABEL: @add_nuw_i3(
-; SI-NEXT: [[R:%.*]] = add nuw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) poison, align 1
-; SI-NEXT: ret void
-;
-; VI-LABEL: @add_nuw_i3(
-; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
-; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
-; VI-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP2]]
-; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
-; VI-NEXT: store volatile i3 [[TMP4]], ptr addrspace(1) poison, align 1
-; VI-NEXT: ret void
-;
- %r = add nuw i3 %a, %b
- store volatile i3 %r, ptr addrspace(1) poison
- ret void
-}
-
-define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) {
-; SI-LABEL: @add_nuw_nsw_i3(
-; SI-NEXT: [[R:%.*]] = add nuw nsw i3 [[A:%.*]], [[B:%.*]]
-; SI-NEXT: store volatile i3 [[R]], ptr addrspace(1) poison, align 1
-; SI-NEXT: ret void
-;
-; VI-LABEL: @add_nuw_nsw_i3(
-; VI-NEXT: [[TMP1:%.*]] = zext i3 [[A:%.*]] to i32
-; VI-NEXT: [[TMP2:%.*]] = zext i3 [[B:%.*]] to i32
-; VI-NEXT: [[TMP3:%.*]] = add nu...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/145483
More information about the llvm-commits
mailing list