[llvm] [PowerPC] Add load/store support for v2048i1 and DMF cryptography instructions (PR #136145)
Maryam Moghadas via llvm-commits
llvm-commits at lists.llvm.org
Fri May 23 10:05:47 PDT 2025
https://github.com/maryammo updated https://github.com/llvm/llvm-project/pull/136145
>From a6905a453790f0a03b73bdebc6473916e505cb43 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Wed, 16 Apr 2025 23:31:15 +0000
Subject: [PATCH 1/5] [PowerPC] Add load/store support for v2048i1 and DMF
cryptography instructions
This commit adds support for loading and storing v2048i1 DMR pairs and
introduces Dense Math Facility cryptography instructions: DMSHA2HASH, DMSHA3HASH,
and DMXXSHAPAD, along with their corresponding intrinsics and tests.
---
llvm/include/llvm/IR/Intrinsics.td | 1 +
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 14 ++
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 144 +++++++++++---
llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 127 +++++++++++++
llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 2 +-
llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll | 176 ++++++++++++++++++
.../PowerPC/ppc-encoding-ISAFuture.txt | 42 +++++
.../PowerPC/ppc64le-encoding-ISAFuture.txt | 42 +++++
llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s | 56 ++++++
9 files changed, 575 insertions(+), 29 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e1a135a5ad48e..50aad27f8fdc4 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -544,6 +544,7 @@ def llvm_v128i1_ty : LLVMType<v128i1>; // 128 x i1
def llvm_v256i1_ty : LLVMType<v256i1>; // 256 x i1
def llvm_v512i1_ty : LLVMType<v512i1>; // 512 x i1
def llvm_v1024i1_ty : LLVMType<v1024i1>; //1024 x i1
+def llvm_v2048i1_ty : LLVMType<v2048i1>; //2048 x i1
def llvm_v4096i1_ty : LLVMType<v4096i1>; //4096 x i1
def llvm_v1i8_ty : LLVMType<v1i8>; // 1 x i8
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 870d1190e696e..751628cee58c0 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1778,6 +1778,20 @@ let TargetPrefix = "ppc" in {
defm int_ppc_mma_pmdmxvf16gerx2 :
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;
+ def int_ppc_mma_dmsha2hash :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
+ llvm_v1024i1_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+ def int_ppc_mma_dmsha3hash :
+ DefaultAttrsIntrinsic<[llvm_v2048i1_ty], [llvm_v2048i1_ty,
+ llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ def int_ppc_mma_dmxxshapad :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
+ llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>,
+ ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
}
// XL Compat intrinsics.
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 59bfec30dc211..108900bb71b8d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1361,8 +1361,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.isISAFuture()) {
addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
+ addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
} else {
addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
}
@@ -11890,15 +11893,19 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue LoadChain = LN->getChain();
SDValue BasePtr = LN->getBasePtr();
EVT VT = Op.getValueType();
+ bool IsV1024i1 = VT == MVT::v1024i1;
+ bool IsV2048i1 = VT == MVT::v2048i1;
- // Type v1024i1 is used for Dense Math dmr registers.
- assert(VT == MVT::v1024i1 && "Unsupported type.");
+ // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
+ // Dense Math dmr pair registers, respectively.
+ assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
"Dense Math support required.");
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
- SmallVector<SDValue, 4> Loads;
- SmallVector<SDValue, 4> LoadChains;
+ SmallVector<SDValue, 8> Loads;
+ SmallVector<SDValue, 8> LoadChains;
+
SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
MachineMemOperand *MMO = LN->getMemOperand();
@@ -11934,10 +11941,40 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+
SDValue Value =
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
- SDValue RetOps[] = {Value, TF};
+ SDValue DmrPValue;
+ if (IsV2048i1) {
+ // This corresponds to v2048i1 which represents a dmr pair.
+ SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[4],
+ Loads[5]), 0);
+ SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
+ Loads[6], Loads[7]), 0);
+ const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
+ SDValue Dmr1Value =
+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl,
+ MVT::v1024i1, Dmr1Ops), 0);
+
+ SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
+ SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
+
+ SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
+ const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
+
+ DmrPValue =
+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1,
+ DmrPOps), 0);
+ }
+
+ SDValue RetOps[2];
+ if (IsV1024i1)
+ RetOps[0] = Value;
+ else
+ RetOps[0] = DmrPValue;
+ RetOps[1] = TF;
+
return DAG.getMergeValues(RetOps, dl);
}
@@ -11949,7 +11986,7 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SDValue BasePtr = LN->getBasePtr();
EVT VT = Op.getValueType();
- if (VT == MVT::v1024i1)
+ if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
return LowerDMFVectorLoad(Op, DAG);
if (VT != MVT::v256i1 && VT != MVT::v512i1)
@@ -11996,34 +12033,85 @@ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
SDValue StoreChain = SN->getChain();
SDValue BasePtr = SN->getBasePtr();
- SmallVector<SDValue, 4> Values;
- SmallVector<SDValue, 4> Stores;
+ SmallVector<SDValue, 8> Values;
+ SmallVector<SDValue, 8> Stores;
EVT VT = SN->getValue().getValueType();
+ bool IsV1024i1 = VT == MVT::v1024i1;
+ bool IsV2048i1 = VT == MVT::v2048i1;
- // Type v1024i1 is used for Dense Math dmr registers.
- assert(VT == MVT::v1024i1 && "Unsupported type.");
+ // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
+ // Dense Math dmr pair registers, respectively.
+ assert((IsV1024i1 || IsV2048i1)&& "Unsupported type.");
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
"Dense Math support required.");
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
- SDValue Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
- 0);
- SDValue Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
- 0);
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
- MachineSDNode *ExtNode =
- DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
+ if (IsV1024i1) {
+ SDValue Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+ SDValue Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ }
+ else {
+ // This corresponds to v2048i1 which represents a dmr pair.
+ SDValue Dmr0(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)), 0);
+
+ SDValue Dmr1(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)), 0);
+
+ SDValue Dmr0Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
+
+ SDValue Dmr0Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
+
+ SDValue Dmr1Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
+
+ SDValue Dmr1Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
+
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ }
if (Subtarget.isLittleEndian())
std::reverse(Values.begin(), Values.end());
@@ -12062,7 +12150,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue Value2 = SN->getValue();
EVT StoreVT = Value.getValueType();
- if (StoreVT == MVT::v1024i1)
+ if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
return LowerDMFVectorStore(Op, DAG);
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
index def9609f46b05..d8d909507dc3b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
@@ -333,6 +333,63 @@ multiclass DMR_NEG_UM_M284_XOXORd11188<bits<6> opcode, bits<8> xo, dag IOL,
}
}
+class XForm_AT3_T1_AB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I <opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<3> AT;
+ bits<3> AB;
+ bits<1> T;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT{2-0};
+ let Inst{9} = 0;
+ let Inst{10} = T;
+ let Inst{11-15} = o;
+ let Inst{16-18} = AB{2-0};
+ let Inst{19-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_ATp2_SR5<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I <opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<2> ATp;
+ bits<5> SR;
+
+ let Pattern = pattern;
+
+ let Inst{6-7} = ATp{1-0};
+ let Inst{8-10} = 0;
+ let Inst{11-15} = o;
+ let Inst{16-20} = SR{4-0};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XX2Form_AT3_XB6_ID2_E1_BL2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<3> AT;
+ bits<6> XB;
+ bits<2> ID;
+ bits<1> E;
+ bits<2> BL;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT{2-0};
+ let Inst{9-10} = 0;
+ let Inst{11-12} = ID{1-0};
+ let Inst{13} = E;
+ let Inst{14-15} = BL{1-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
let Predicates = [IsISAFuture] in {
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
(outs vsrprc:$XAp, vsrprc:$XBp),
@@ -415,6 +472,25 @@ defm DMXVBF16GERX2 : DMR_NEG_UM_M284_XOXORf939a0<59, 74, (ins vsrprc:$XAp, vsrc:
defm DMXVF16GERX2 : DMR_NEG_UM_M284_XOXORd11188<59, 66, (ins vsrprc:$XAp, vsrc:$XB),
"dmxvf16gerx2", "$AT, $XAp, $XB">;
+// DMF cryptography [support] Instructions
+let Predicates = [IsISAFuture] in {
+ def DMSHA2HASH :
+ XForm_AT3_T1_AB3<31, 14, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB, u1imm:$T),
+ "dmsha2hash $AT, $AB, $T", []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+
+ def DMSHA3HASH :
+ XForm_ATp2_SR5<31, 15, 177, (outs dmrprc:$ATp), (ins dmrprc:$ATpi , u5imm:$SR),
+ "dmsha3hash $ATp, $SR", []>,
+ RegConstraint<"$ATpi = $ATp">, NoEncode<"$ATpi">;
+
+ def DMXXSHAPAD :
+ XX2Form_AT3_XB6_ID2_E1_BL2<60, 421, (outs dmr:$AT),
+ (ins dmr:$ATi, vsrc:$XB, u2imm:$ID, u1imm:$E, u2imm:$BL),
+ "dmxxshapad $AT, $XB, $ID, $E, $BL", []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+}
+
// MMA+ Intrinsics
let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
@@ -532,3 +608,54 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
(PMDMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
}
+
+// cryptography Intrinsics
+let Predicates = [IsISAFuture] in {
+ def : Pat<(v1024i1 (int_ppc_mma_dmsha2hash v1024i1:$ATi, v1024i1:$AB, timm:$T)),
+ (DMSHA2HASH $ATi, $AB, $T)>;
+
+ def : Pat<(v2048i1 (int_ppc_mma_dmsha3hash v2048i1:$ATpi, timm:$SR)),
+ (DMSHA3HASH $ATpi, $SR)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_dmxxshapad v1024i1:$ATi, v16i8:$XB, timm:$ID,
+ timm:$E, timm:$BL)), (DMXXSHAPAD $ATi, RCCp.BToVSRC, $ID, $E, $BL)>;
+}
+
+// MMA+ Instruction aliases
+let Predicates = [IsISAFuture] in {
+ def : InstAlias<"dmsha256hash $AT, $AB",
+ (DMSHA2HASH dmr:$AT, dmr:$AB, 0)>;
+
+ def : InstAlias<"dmsha512hash $AT, $AB",
+ (DMSHA2HASH dmr:$AT, dmr:$AB, 1)>;
+
+ def : InstAlias<"dmsha3dw $ATp",
+ (DMSHA3HASH dmrprc:$ATp, 0)>;
+
+ def : InstAlias<"dmcryshash $ATp",
+ (DMSHA3HASH dmrprc:$ATp, 12)>;
+
+ def : InstAlias<"dmxxsha3512pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 0)>;
+
+ def : InstAlias<"dmxxsha3384pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 1)>;
+
+ def : InstAlias<"dmxxsha3256pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 2)>;
+
+ def : InstAlias<"dmxxsha3224pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 3)>;
+
+ def : InstAlias<"dmxxshake256pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 1, u1imm:$E, 0)>;
+
+ def : InstAlias<"dmxxshake128pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 1, u1imm:$E, 1)>;
+
+ def : InstAlias<"dmxxsha384512pad $AT, $XB",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 2, 0, 0)>;
+
+ def : InstAlias<"dmxxsha224256pad $AT, $XB",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 3, 0, 0)>;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8b690b7b833b3..359adc31eb10b 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -1140,6 +1140,6 @@ def PPCRegDMRpRCAsmOperand : AsmOperandClass {
let PredicateMethod = "isDMRpRegNumber";
}
-def dmrp : RegisterOperand<DMRpRC> {
+def dmrprc : RegisterOperand<DMRpRC> {
let ParserMatchClass = PPCRegDMRpRCAsmOperand;
}
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
new file mode 100644
index 0000000000000..afa28144cbf65
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+declare <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1>, <1024 x i1>, i32)
+
+define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) {
+; CHECK-LABEL: test_dmsha2hash:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxvp vsp34, 0(r4)
+; CHECK-NEXT: lxvp vsp36, 32(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r4)
+; CHECK-NEXT: lxvp vsp36, 96(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmsha256hash dmr0, dmr1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmsha2hash:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmsha256hash dmr0, dmr1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vop, align 64
+ %1 = load <1024 x i1>, ptr %vinp, align 64
+ %3 = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 0)
+ store <1024 x i1> %3, ptr %resp, align 64
+ ret void
+}
+
+declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32)
+
+define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) {
+; CHECK-LABEL: test_dmsha3hash:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: lxvp vsp34, 128(r3)
+; CHECK-NEXT: lxvp vsp36, 160(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 192(r3)
+; CHECK-NEXT: lxvp vsp36, 224(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmsha3hash dmrp0, 5
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r4)
+; CHECK-NEXT: stxvp vsp36, 192(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r4)
+; CHECK-NEXT: stxvp vsp36, 128(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r4)
+; CHECK-NEXT: stxvp vsp36, 64(r4)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r4)
+; CHECK-NEXT: stxvp vsp36, 0(r4)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmsha3hash:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 224(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 192(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 160(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 128(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmsha3hash dmrp0, 5
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <2048 x i1>, ptr %vopp, align 64
+ %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+ store <2048 x i1> %2, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1>, <16 x i8>, i32, i32, i32)
+
+define dso_local void @test_dmxxshapad(ptr %vopp, ptr %vcp, ptr %resp) {
+; CHECK-LABEL: test_dmxxshapad:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv vs0, 0(r4)
+; CHECK-NEXT: dmxxshapad dmr0, vs0, 2, 1, 3
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmxxshapad:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxv vs0, 0(r4)
+; CHECK-BE-NEXT: dmxxshapad dmr0, vs0, 2, 1, 3
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vopp, align 64
+ %1 = load <16 x i8>, ptr %vcp, align 64
+ %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxxshapad(<1024 x i1> %0, <16 x i8> %1, i32 2, i32 1, i32 3)
+ store <1024 x i1> %2, ptr %resp, align 64
+ ret void
+}
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
index 3936f4dc3806d..4bea42243f83b 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
@@ -153,3 +153,45 @@
#CHECK: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3
0x07,0x90,0xc0,0xc5,0xec,0x02,0x26,0x50
+
+#CHECK: dmsha256hash 0, 2
+0x7c,0x0e,0x41,0x62
+
+#CHECK: dmsha512hash 0, 2
+0x7c,0x2e,0x41,0x62
+
+#CHECK: dmsha3hash 0, 5
+0x7c,0x0f,0x29,0x62
+
+#CHECK: dmsha3dw 0
+0x7c,0x0f,0x01,0x62
+
+#CHECK: dmcryshash 0
+0x7c,0x0f,0x61,0x62
+
+#CHECK: dmxxshapad 0, 1, 2, 1, 3
+0xf0,0x17,0x0e,0x94
+
+#CHECK: dmxxsha3512pad 0, 1, 1
+0xf0,0x04,0x0e,0x94
+
+#CHECK: dmxxsha3384pad 0, 1, 1
+0xf0,0x05,0x0e,0x94
+
+#CHECK: dmxxsha3256pad 0, 1, 1
+0xf0,0x06,0x0e,0x94
+
+#CHECK: dmxxsha3224pad 0, 1, 1
+0xf0,0x07,0x0e,0x94
+
+#CHECK: dmxxshake256pad 0, 1, 1
+0xf0,0x0c,0x0e,0x94
+
+#CHECK: dmxxshake128pad 0, 1, 1
+0xf0,0x0d,0x0e,0x94
+
+#CHECK: dmxxsha384512pad 0, 1
+0xf0,0x10,0x0e,0x94
+
+#CHECK: dmxxsha224256pad 0, 1
+0xf0,0x18,0x0e,0x94
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
index 6ac13ec08af54..233693e67292e 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
@@ -147,3 +147,45 @@
#CHECK: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3
0xc5,0xc0,0x90,0x07,0x50,0x26,0x02,0xec
+
+#CHECK: dmsha256hash 0, 2
+0x62,0x41,0x0e,0x7c
+
+#CHECK: dmsha512hash 0, 2
+0x62,0x41,0x2e,0x7c
+
+#CHECK: dmsha3hash 0, 5
+0x62,0x29,0x0f,0x7c
+
+#CHECK: dmsha3dw 0
+0x62,0x01,0x0f,0x7c
+
+#CHECK: dmcryshash
+0x62,0x61,0x0f,0x7c
+
+#CHECK: dmxxshapad 0, 1, 2, 1, 3
+0x94,0x0e,0x17,0xf0
+
+#CHECK: dmxxsha3512pad 0, 1, 1
+0x94,0x0e,0x04,0xf0
+
+#CHECK: dmxxsha3384pad 0, 1, 1
+0x94,0x0e,0x05,0xf0
+
+#CHECK: dmxxsha3256pad 0, 1, 1
+0x94,0x0e,0x06,0xf0
+
+#CHECK: dmxxsha3224pad 0, 1, 1
+0x94,0x0e,0x07,0xf0
+
+#CHECK: dmxxshake256pad 0, 1, 1
+0x94,0x0e,0x0c,0xf0
+
+#CHECK: dmxxshake128pad 0, 1, 1
+0x94,0x0e,0x0d,0xf0
+
+#CHECK: dmxxsha384512pad 0, 1
+0x94,0x0e,0x10,0xf0
+
+#CHECK: dmxxsha224256pad 0, 1
+0x94,0x0e,0x18,0xf0
diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
index 984ddf7eff0a6..cba93291e4595 100644
--- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
+++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
@@ -226,3 +226,59 @@
#CHECK-BE-SAME: 0xec,0x02,0x26,0x50]
#CHECK-LE: pmdmxvf16gerx2nn 0, 2, 4, 12, 5, 3 # encoding: [0xc5,0xc0,0x90,0x07,
#CHECK-LE-SAME: 0x50,0x26,0x02,0xec]
+
+ dmsha2hash 0, 2, 0
+#CHECK-BE: dmsha256hash 0, 2 # encoding: [0x7c,0x0e,0x41,0x62]
+#CHECK-LE: dmsha256hash 0, 2 # encoding: [0x62,0x41,0x0e,0x7c]
+
+ dmsha2hash 0, 2, 1
+#CHECK-BE: dmsha512hash 0, 2 # encoding: [0x7c,0x2e,0x41,0x62]
+#CHECK-LE: dmsha512hash 0, 2 # encoding: [0x62,0x41,0x2e,0x7c]
+
+ dmsha3hash 0, 5
+#CHECK-BE: dmsha3hash 0, 5 # encoding: [0x7c,0x0f,0x29,0x62]
+#CHECK-LE: dmsha3hash 0, 5 # encoding: [0x62,0x29,0x0f,0x7c]
+
+ dmsha3dw 0
+#CHECK-BE: dmsha3dw 0 # encoding: [0x7c,0x0f,0x01,0x62]
+#CHECK-LE: dmsha3dw 0 # encoding: [0x62,0x01,0x0f,0x7c]
+
+ dmcryshash 0
+#CHECK-BE: dmcryshash 0 # encoding: [0x7c,0x0f,0x61,0x62]
+#CHECK-LE: dmcryshash 0 # encoding: [0x62,0x61,0x0f,0x7c]
+
+ dmxxshapad 0, 1, 2, 1, 3
+#CHECK-BE: dmxxshapad 0, 1, 2, 1, 3 # encoding: [0xf0,0x17,0x0e,0x94]
+#CHECK-LE: dmxxshapad 0, 1, 2, 1, 3 # encoding: [0x94,0x0e,0x17,0xf0]
+
+ dmxxsha3512pad 0, 1, 1
+#CHECK-BE: dmxxsha3512pad 0, 1, 1 # encoding: [0xf0,0x04,0x0e,0x94]
+#CHECK-LE: dmxxsha3512pad 0, 1, 1 # encoding: [0x94,0x0e,0x04,0xf0]
+
+ dmxxsha3384pad 0, 1, 1
+#CHECK-BE: dmxxsha3384pad 0, 1, 1 # encoding: [0xf0,0x05,0x0e,0x94]
+#CHECK-LE: dmxxsha3384pad 0, 1, 1 # encoding: [0x94,0x0e,0x05,0xf0]
+
+ dmxxsha3256pad 0, 1, 1
+#CHECK-BE: dmxxsha3256pad 0, 1, 1 # encoding: [0xf0,0x06,0x0e,0x94]
+#CHECK-LE: dmxxsha3256pad 0, 1, 1 # encoding: [0x94,0x0e,0x06,0xf0]
+
+ dmxxsha3224pad 0, 1, 1
+#CHECK-BE: dmxxsha3224pad 0, 1, 1 # encoding: [0xf0,0x07,0x0e,0x94]
+#CHECK-LE: dmxxsha3224pad 0, 1, 1 # encoding: [0x94,0x0e,0x07,0xf0]
+
+ dmxxshake256pad 0, 1, 1
+#CHECK-BE: dmxxshake256pad 0, 1, 1 # encoding: [0xf0,0x0c,0x0e,0x94]
+#CHECK-LE: dmxxshake256pad 0, 1, 1 # encoding: [0x94,0x0e,0x0c,0xf0]
+
+ dmxxshake128pad 0, 1, 1
+#CHECK-BE: dmxxshake128pad 0, 1, 1 # encoding: [0xf0,0x0d,0x0e,0x94]
+#CHECK-LE: dmxxshake128pad 0, 1, 1 # encoding: [0x94,0x0e,0x0d,0xf0]
+
+ dmxxsha384512pad 0, 1
+#CHECK-BE: dmxxsha384512pad 0, 1 # encoding: [0xf0,0x10,0x0e,0x94]
+#CHECK-LE: dmxxsha384512pad 0, 1 # encoding: [0x94,0x0e,0x10,0xf0]
+
+ dmxxsha224256pad 0, 1
+#CHECK-BE: dmxxsha224256pad 0, 1 # encoding: [0xf0,0x18,0x0e,0x94]
+#CHECK-LE: dmxxsha224256pad 0, 1 # encoding: [0x94,0x0e,0x18,0xf0]
>From c86e2283a1bbdb5a53d0126301233622af8083f0 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Thu, 17 Apr 2025 15:25:39 +0000
Subject: [PATCH 2/5] Fix formatting
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 131 ++++++++++----------
1 file changed, 67 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 108900bb71b8d..e8f10c885d929 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11948,14 +11948,15 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue DmrPValue;
if (IsV2048i1) {
// This corresponds to v2048i1 which represents a dmr pair.
- SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[4],
- Loads[5]), 0);
+ SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
+ Loads[4], Loads[5]),
+ 0);
SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
- Loads[6], Loads[7]), 0);
+ Loads[6], Loads[7]),
+ 0);
const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
- SDValue Dmr1Value =
- SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl,
- MVT::v1024i1, Dmr1Ops), 0);
+ SDValue Dmr1Value = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
@@ -11963,16 +11964,15 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
- DmrPValue =
- SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1,
- DmrPOps), 0);
+ DmrPValue = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
}
SDValue RetOps[2];
if (IsV1024i1)
RetOps[0] = Value;
- else
- RetOps[0] = DmrPValue;
+ else
+ RetOps[0] = DmrPValue;
RetOps[1] = TF;
return DAG.getMergeValues(RetOps, dl);
@@ -12041,23 +12041,23 @@ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
// The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
// Dense Math dmr pair registers, respectively.
- assert((IsV1024i1 || IsV2048i1)&& "Unsupported type.");
+ assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
"Dense Math support required.");
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
if (IsV1024i1) {
- SDValue Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
- 0);
- SDValue Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
- 0);
+ SDValue Lo(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+ SDValue Hi(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
MachineSDNode *ExtNode =
DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
Values.push_back(SDValue(ExtNode, 0));
@@ -12065,52 +12065,55 @@ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
Values.push_back(SDValue(ExtNode, 0));
Values.push_back(SDValue(ExtNode, 1));
- }
- else {
+ } else {
// This corresponds to v2048i1 which represents a dmr pair.
SDValue Dmr0(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)), 0);
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
+ 0);
SDValue Dmr1(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)), 0);
-
- SDValue Dmr0Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr0,
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
-
- SDValue Dmr0Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr0,
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
-
- SDValue Dmr1Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr1,
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
-
- SDValue Dmr1Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr1,
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
-
- MachineSDNode *ExtNode =
- DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr0Lo(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr0Hi(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr1Lo(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr1Hi(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
}
if (Subtarget.isLittleEndian())
>From 1e796ca24154f395734c066fa1f44548afc61849 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Thu, 15 May 2025 17:30:00 +0000
Subject: [PATCH 3/5] Address review: combine instruction defs with builtin
patterns where possible
---
llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 22 ++++++++------------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
index d8d909507dc3b..71c6eac0860ea 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
@@ -476,19 +476,21 @@ defm DMXVF16GERX2 : DMR_NEG_UM_M284_XOXORd11188<59, 66, (ins vsrprc:$XAp, vsrc:$
let Predicates = [IsISAFuture] in {
def DMSHA2HASH :
XForm_AT3_T1_AB3<31, 14, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB, u1imm:$T),
- "dmsha2hash $AT, $AB, $T", []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ "dmsha2hash $AT, $AB, $T",
+ [(set v1024i1:$AT, (int_ppc_mma_dmsha2hash v1024i1:$ATi, v1024i1:$AB, timm:$T))]>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def DMSHA3HASH :
XForm_ATp2_SR5<31, 15, 177, (outs dmrprc:$ATp), (ins dmrprc:$ATpi , u5imm:$SR),
- "dmsha3hash $ATp, $SR", []>,
- RegConstraint<"$ATpi = $ATp">, NoEncode<"$ATpi">;
+ "dmsha3hash $ATp, $SR",
+ [(set v2048i1:$ATp, (int_ppc_mma_dmsha3hash v2048i1:$ATpi, timm:$SR))]>,
+ RegConstraint<"$ATpi = $ATp">, NoEncode<"$ATpi">;
def DMXXSHAPAD :
XX2Form_AT3_XB6_ID2_E1_BL2<60, 421, (outs dmr:$AT),
(ins dmr:$ATi, vsrc:$XB, u2imm:$ID, u1imm:$E, u2imm:$BL),
- "dmxxshapad $AT, $XB, $ID, $E, $BL", []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ "dmxxshapad $AT, $XB, $ID, $E, $BL", []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
// MMA+ Intrinsics
@@ -609,14 +611,8 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
}
-// cryptography Intrinsics
+// Cryptography Intrinsic
let Predicates = [IsISAFuture] in {
- def : Pat<(v1024i1 (int_ppc_mma_dmsha2hash v1024i1:$ATi, v1024i1:$AB, timm:$T)),
- (DMSHA2HASH $ATi, $AB, $T)>;
-
- def : Pat<(v2048i1 (int_ppc_mma_dmsha3hash v2048i1:$ATpi, timm:$SR)),
- (DMSHA3HASH $ATpi, $SR)>;
-
def : Pat<(v1024i1 (int_ppc_mma_dmxxshapad v1024i1:$ATi, v16i8:$XB, timm:$ID,
timm:$E, timm:$BL)), (DMXXSHAPAD $ATi, RCCp.BToVSRC, $ID, $E, $BL)>;
}
>From b15ff115d451f6c40a42bde2574f2950c581c461 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Tue, 20 May 2025 16:33:49 +0000
Subject: [PATCH 4/5] Address review comments on LowerDMFVectorLoad
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 47 ++++++++++-----------
1 file changed, 22 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e8f10c885d929..5956a1111b408 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11945,36 +11945,33 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue Value =
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
- SDValue DmrPValue;
- if (IsV2048i1) {
- // This corresponds to v2048i1 which represents a dmr pair.
- SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
- Loads[4], Loads[5]),
- 0);
- SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
- Loads[6], Loads[7]),
- 0);
- const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
- SDValue Dmr1Value = SDValue(
- DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
+ if (IsV1024i1) {
+ SDValue RetOps[] = {Value, TF};
+ return DAG.getMergeValues(RetOps, dl);
+ }
- SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
- SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
+ // Handle Loads for V2048i1 which represents a dmr pair.
+ SDValue DmrPValue;
+ SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
+ Loads[4], Loads[5]),
+ 0);
+ SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
+ Loads[6], Loads[7]),
+ 0);
+ const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
+ SDValue Dmr1Value = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
- SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
- const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
+ SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
+ SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
- DmrPValue = SDValue(
- DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
- }
+ SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
+ const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
- SDValue RetOps[2];
- if (IsV1024i1)
- RetOps[0] = Value;
- else
- RetOps[0] = DmrPValue;
- RetOps[1] = TF;
+ DmrPValue = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
+ SDValue RetOps[] = {DmrPValue, TF};
return DAG.getMergeValues(RetOps, dl);
}
>From e086f739ce22ff1364c34a8de712c897cc847fc0 Mon Sep 17 00:00:00 2001
From: Maryam Moghadas <maryammo at ca.ibm.com>
Date: Fri, 23 May 2025 14:47:38 +0000
Subject: [PATCH 5/5] Address review comments
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5956a1111b408..7fc625c3a4e6c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11946,8 +11946,7 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
if (IsV1024i1) {
- SDValue RetOps[] = {Value, TF};
- return DAG.getMergeValues(RetOps, dl);
+ return DAG.getMergeValues({Value, TF}, dl);
}
// Handle Loads for V2048i1 which represents a dmr pair.
@@ -11971,8 +11970,7 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
DmrPValue = SDValue(
DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
- SDValue RetOps[] = {DmrPValue, TF};
- return DAG.getMergeValues(RetOps, dl);
+ return DAG.getMergeValues({DmrPValue, TF}, dl);
}
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
More information about the llvm-commits
mailing list