[llvm] 530f0e6 - [AMDGPU] Replace `isInlinableLiteral16` with specific version (#81345)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 4 05:40:46 PST 2024
Author: Shilei Tian
Date: 2024-03-04T08:40:42-05:00
New Revision: 530f0e64ec11327879c44f2fd55c7c28efdbaa2d
URL: https://github.com/llvm/llvm-project/commit/530f0e64ec11327879c44f2fd55c7c28efdbaa2d
DIFF: https://github.com/llvm/llvm-project/commit/530f0e64ec11327879c44f2fd55c7c28efdbaa2d.diff
LOG: [AMDGPU] Replace `isInlinableLiteral16` with specific version (#81345)
Added:
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/CodeGen/AMDGPU/immv216.ll
llvm/test/CodeGen/AMDGPU/inline-constraints.ll
llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cb4eddfe5320fa..10999d846e3bb2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -2006,8 +2006,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
return isInlinableIntLiteral(Val);
}
- // f16/v2f16 operands work correctly for all values.
- return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
+ if (VT.getScalarType() == MVT::f16)
+ return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
+
+ assert(VT.getScalarType() == MVT::bf16);
+
+ return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
}
bool AMDGPUOperand::isInlinableImm(MVT type) const {
@@ -2375,15 +2379,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
- case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ if (isSafeTruncation(Val, 16) &&
+ AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
+ Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindConst();
+ return;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Val & 0xffff));
+ setImmKindLiteral();
+ return;
+
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
if (isSafeTruncation(Val, 16) &&
- AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
- AsmParser->hasInv2PiInlineImm())) {
+ AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
+ AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
return;
@@ -2410,12 +2425,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
+ assert(isSafeTruncation(Val, 16));
+ assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
assert(isSafeTruncation(Val, 16));
- assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
- AsmParser->hasInv2PiInlineImm()));
+ assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
+ AsmParser->hasInv2PiInlineImm()));
Inst.addOperand(MCOperand::createImm(Val));
return;
@@ -3559,7 +3579,19 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
return AMDGPU::isInlinableLiteralV2BF16(Val);
- return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
+ return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
+
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
+ return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
+
+ llvm_unreachable("invalid operand type");
}
default:
llvm_unreachable("invalid operand size");
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index a32be1e50a6053..683e8dad796c10 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -460,10 +460,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
}
}
-// This must accept a 32-bit immediate value to correctly handle packed 16-bit
-// operations.
-static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O) {
+static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (Imm == 0x3C00)
O << "1.0";
else if (Imm == 0xBC00)
@@ -529,9 +527,9 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
O << formatHex(static_cast<uint64_t>(Imm));
}
-void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
if (isInlinableIntLiteral(SImm)) {
O << SImm;
@@ -539,7 +537,7 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
}
uint16_t HImm = static_cast<uint16_t>(Imm);
- if (printImmediateFloat16(HImm, STI, O))
+ if (printImmediateFP16(HImm, STI, O))
return;
uint64_t Imm16 = static_cast<uint16_t>(Imm);
@@ -566,7 +564,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
if (isUInt<16>(Imm) &&
- printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
+ printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
return;
break;
case AMDGPU::OPERAND_REG_IMM_V2BF16:
@@ -845,7 +843,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
- printImmediate16(Op.getImm(), STI, O);
+ printImmediateF16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 15ecbf2e5e5918..c801eaf1111e2f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O);
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediateV216(uint32_t Imm, uint8_t OpType,
const MCSubtargetInfo &STI, raw_ostream &O);
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0df0b5cdf0f392..9476c33acc34ac 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15425,16 +15425,32 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
llvm_unreachable("Invalid asm constraint");
}
-bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
- uint64_t Val,
+bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
unsigned MaxSize) const {
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
- if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
- (Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
- (Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
+ if (Size == 16) {
+ MVT VT = Op.getSimpleValueType();
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i16:
+ return AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
+ case MVT::f16:
+ return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
+ case MVT::bf16:
+ return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
+ case MVT::v2i16:
+ return AMDGPU::getInlineEncodingV2I16(Val).has_value();
+ case MVT::v2f16:
+ return AMDGPU::getInlineEncodingV2F16(Val).has_value();
+ case MVT::v2bf16:
+ return AMDGPU::getInlineEncodingV2BF16(Val).has_value();
+ }
+ }
+ if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
+ (Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
return true;
- }
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 31ced9d41e15e2..edd87e340d10d2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4121,13 +4121,32 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
ST.hasInv2PiInlineImm());
case 16:
return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
- ST.hasInv2PiInlineImm());
+ AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
+ ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
}
}
+bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
+ APInt IntImm = Imm.bitcastToAPInt();
+ int64_t IntImmVal = IntImm.getSExtValue();
+ bool HasInv2Pi = ST.hasInv2PiInlineImm();
+ switch (APFloat::SemanticsToEnum(Imm.getSemantics())) {
+ default:
+ llvm_unreachable("invalid fltSemantics");
+ case APFloatBase::S_IEEEsingle:
+ case APFloatBase::S_IEEEdouble:
+ return isInlineConstant(IntImm);
+ case APFloatBase::S_BFloat:
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteralBF16(IntImmVal, HasInv2Pi);
+ case APFloatBase::S_IEEEhalf:
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteralFP16(IntImmVal, HasInv2Pi);
+ }
+}
+
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint8_t OperandType) const {
assert(!MO.isReg() && "isInlineConstant called on register operand!");
@@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+ AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
}
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 82c6117292aee1..dab2cb2946ac97 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -984,9 +984,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isInlineConstant(const APInt &Imm) const;
- bool isInlineConstant(const APFloat &Imm) const {
- return isInlineConstant(Imm.bitcastToAPInt());
- }
+ bool isInlineConstant(const APFloat &Imm) const;
// Returns true if this non-register operand definitely does not need to be
// encoded as a 32-bit literal. Note that this function handles all kinds of
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 963dc2882fcc0d..63285c06edaf2c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2647,13 +2647,19 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3E22; // 1.0 / (2.0 * pi)
}
-bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
+bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;
-
if (isInlinableIntLiteral(Literal))
return true;
+ return Literal == static_cast<int16_t>(0x3e22f983);
+}
+bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
+ if (!HasInv2Pi)
+ return false;
+ if (isInlinableIntLiteral(Literal))
+ return true;
uint16_t Val = static_cast<uint16_t>(Literal);
return Val == 0x3C00 || // 1.0
Val == 0xBC00 || // -1.0
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 6edf01d1217f2d..9fcb4caca30b01 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1397,7 +1397,13 @@ LLVM_READNONE
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
-bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
diff --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll
index b66ca71a327495..ae51c3edf1c7e7 100644
--- a/llvm/test/CodeGen/AMDGPU/immv216.ll
+++ b/llvm/test/CodeGen/AMDGPU/immv216.ll
@@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
}
; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
+; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
+; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
ret <2 x i16> %y
@@ -635,10 +635,10 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
}
; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
+; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
ret <2 x i16> %y
@@ -646,20 +646,20 @@ define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
}
; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
+; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
-; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
+; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
+; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]
-; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
ret <2 x i16> %y
diff --git a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll
index 9ef246fe2e1015..7bd6b037386b04 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll
@@ -97,7 +97,6 @@ define i32 @inline_A_constraint_H1() {
; NOSI: error: invalid operand for inline asm constraint 'A'
; VI-LABEL: {{^}}inline_A_constraint_H2:
-; VI: v_mov_b32 {{v[0-9]+}}, 0x3c00
define i32 @inline_A_constraint_H2() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 1.0 to i16))
ret i32 %v0
@@ -105,7 +104,6 @@ define i32 @inline_A_constraint_H2() {
; NOSI: error: invalid operand for inline asm constraint 'A'
; VI-LABEL: {{^}}inline_A_constraint_H3:
-; VI: v_mov_b32 {{v[0-9]+}}, 0xbc00
define i32 @inline_A_constraint_H3() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half -1.0 to i16))
ret i32 %v0
@@ -113,7 +111,6 @@ define i32 @inline_A_constraint_H3() {
; NOSI: error: invalid operand for inline asm constraint 'A'
; VI-LABEL: {{^}}inline_A_constraint_H4:
-; VI: v_mov_b32 {{v[0-9]+}}, 0x3118
define i32 @inline_A_constraint_H4() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(half 0xH3118)
ret i32 %v0
@@ -121,7 +118,6 @@ define i32 @inline_A_constraint_H4() {
; NOSI: error: invalid operand for inline asm constraint 'A'
; VI-LABEL: {{^}}inline_A_constraint_H5:
-; VI: v_mov_b32 {{v[0-9]+}}, 0x3118
define i32 @inline_A_constraint_H5() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(i16 bitcast (half 0xH3118 to i16))
ret i32 %v0
@@ -129,7 +125,6 @@ define i32 @inline_A_constraint_H5() {
; NOSI: error: invalid operand for inline asm constraint 'A'
; VI-LABEL: {{^}}inline_A_constraint_H6:
-; VI: v_mov_b32 {{v[0-9]+}}, 0xb800
define i32 @inline_A_constraint_H6() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(half -0.5)
ret i32 %v0
@@ -293,7 +288,6 @@ define i32 @inline_A_constraint_V0() {
; NOSI: error: invalid operand for inline asm constraint 'A'
; VI-LABEL: {{^}}inline_A_constraint_V1:
-; VI: v_mov_b32 {{v[0-9]+}}, 0xb800
define i32 @inline_A_constraint_V1() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,A"(<2 x half> <half -0.5, half -0.5>)
ret i32 %v0
@@ -970,7 +964,6 @@ define i32 @inline_DA_constraint_H1() {
; NOSI: error: invalid operand for inline asm constraint 'DA'
; VI-LABEL: {{^}}inline_DA_constraint_H2:
-; VI: v_mov_b32 {{v[0-9]+}}, 0x3c00
define i32 @inline_DA_constraint_H2() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,^DA"(i16 bitcast (half 1.0 to i16))
ret i32 %v0
@@ -978,7 +971,6 @@ define i32 @inline_DA_constraint_H2() {
; NOSI: error: invalid operand for inline asm constraint 'DA'
; VI-LABEL: {{^}}inline_DA_constraint_H3:
-; VI: v_mov_b32 {{v[0-9]+}}, 0xbc00
define i32 @inline_DA_constraint_H3() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,^DA"(i16 bitcast (half -1.0 to i16))
ret i32 %v0
@@ -986,7 +978,6 @@ define i32 @inline_DA_constraint_H3() {
; NOSI: error: invalid operand for inline asm constraint 'DA'
; VI-LABEL: {{^}}inline_DA_constraint_H4:
-; VI: v_mov_b32 {{v[0-9]+}}, 0x3118
define i32 @inline_DA_constraint_H4() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,^DA"(half 0xH3118)
ret i32 %v0
@@ -994,7 +985,6 @@ define i32 @inline_DA_constraint_H4() {
; NOSI: error: invalid operand for inline asm constraint 'DA'
; VI-LABEL: {{^}}inline_DA_constraint_H5:
-; VI: v_mov_b32 {{v[0-9]+}}, 0x3118
define i32 @inline_DA_constraint_H5() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,^DA"(i16 bitcast (half 0xH3118 to i16))
ret i32 %v0
@@ -1002,7 +992,6 @@ define i32 @inline_DA_constraint_H5() {
; NOSI: error: invalid operand for inline asm constraint 'DA'
; VI-LABEL: {{^}}inline_DA_constraint_H6:
-; VI: v_mov_b32 {{v[0-9]+}}, 0xb800
define i32 @inline_DA_constraint_H6() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,^DA"(half -0.5)
ret i32 %v0
@@ -1164,7 +1153,6 @@ define i32 @inline_DA_constraint_V0() {
; NOSI: error: invalid operand for inline asm constraint 'DA'
; VI-LABEL: {{^}}inline_DA_constraint_V1:
-; VI: v_mov_b32 {{v[0-9]+}}, 0xb800
define i32 @inline_DA_constraint_V1() {
%v0 = tail call i32 asm "v_mov_b32 $0, $1", "=v,^DA"(<2 x half> <half -0.5, half -0.5>)
ret i32 %v0
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index 5de9b0b92c9a02..1a55bf608ebf51 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -3400,9 +3400,9 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0xc400c400
+; GFX9-SDAG-NEXT: s_movk_i32 s2, 0xc400
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
@@ -3418,29 +3418,53 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
-; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, 0xffffc400, v1 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-SDAG-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT: s_endpgm
+; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, 0xffffc400, v1 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
+; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
+; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3541,9 +3565,9 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x44004400
+; GFX9-SDAG-NEXT: s_movk_i32 s2, 0x4400
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
@@ -3559,29 +3583,53 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX10-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_add_u16 v1, 0x44004400, v1
-; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-NEXT: s_endpgm
+; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-SDAG-NEXT: s_endpgm
;
-; GFX11-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_add_u16 v1, 0x44004400, v1
-; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-NEXT: s_endpgm
+; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0x44004400, v1
+; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_nop 0
+; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0x44004400, v1
+; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-GISEL-NEXT: s_nop 0
+; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
More information about the llvm-commits
mailing list