[llvm] b2f3863 - [Hexagon] Added v32i1/v64i1 to v32f32/v64f16 lowering (#159355)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 17 07:49:31 PDT 2025
Author: pkarveti
Date: 2025-09-17T09:49:27-05:00
New Revision: b2f38637cd3635a186da9d3a8776fe0b395f01de
URL: https://github.com/llvm/llvm-project/commit/b2f38637cd3635a186da9d3a8776fe0b395f01de
DIFF: https://github.com/llvm/llvm-project/commit/b2f38637cd3635a186da9d3a8776fe0b395f01de.diff
LOG: [Hexagon] Added v32i1/v64i1 to v32f32/v64f16 lowering (#159355)
This patch introduces uint_to_fp conversions from v32i1 and v64i1
predicate vectors to v32f32 and v64f16 floating-point vectors.
Patch-by: Santanu Das
Co-authored-by: quic-santdas <quic_santdas at quicinc.com>
Added:
llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll
Modified:
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/lib/Target/Hexagon/HexagonISelLowering.h
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index c54b67ccd8843..9f7f434b66fa1 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3352,7 +3352,6 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
-
// Handle INLINEASM first.
if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 9ebbbc6399b42..8d04edbea5b43 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -577,6 +577,8 @@ class HexagonTargetLowering : public TargetLowering {
SDValue LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxPred32ToFp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxPred64ToFp(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index ff02a67d54363..d0dfa47468705 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -446,6 +446,10 @@ HexagonTargetLowering::initializeHVXLowering() {
}
}
+ // Include cases which are not hander earlier
+ setOperationAction(ISD::UINT_TO_FP, MVT::v32i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v64i1, Custom);
+
setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
}
@@ -2333,6 +2337,123 @@ HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
return ExpandHvxFpToInt(Op, DAG);
}
+// For vector type v32i1 uint_to_fp to v32f32:
+// R1 = #1, R2 holds the v32i1 param
+// V1 = vsplat(R1)
+// V2 = vsplat(R2)
+// Q0 = vand(V1,R1)
+// V0.w=prefixsum(Q0)
+// V0.w=vsub(V0.w,V1.w)
+// V2.w = vlsr(V2.w,V0.w)
+// V2 = vand(V2,V1)
+// V2.sf = V2.w
+SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
+ SelectionDAG &DAG) const {
+
+ MVT ResTy = ty(PredOp);
+ const SDLoc &dl(PredOp);
+
+ SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
+ SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
+ SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ SDValue(RegConst, 0));
+ SDNode *PredTransfer =
+ DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
+ SDValue(SplatConst, 0), SDValue(RegConst, 0));
+ SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
+ SDValue(PredTransfer, 0));
+ SDNode *SplatParam = DAG.getMachineNode(
+ Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
+ SDNode *Vsub =
+ DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
+ SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
+ SDNode *IndexShift =
+ DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+ SDValue(SplatParam, 0), SDValue(Vsub, 0));
+ SDNode *MaskOff =
+ DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+ SDValue(IndexShift, 0), SDValue(SplatConst, 0));
+ SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
+ SDValue(MaskOff, 0));
+ return SDValue(Convert, 0);
+}
+
+// For vector type v64i1 uint_to_fo to v64f16:
+// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
+// R3 = subreg_high (R32)
+// R2 = subreg_low (R32)
+// R1 = #1
+// V1 = vsplat(R1)
+// V2 = vsplat(R2)
+// V3 = vsplat(R3)
+// Q0 = vand(V1,R1)
+// V0.w=prefixsum(Q0)
+// V0.w=vsub(V0.w,V1.w)
+// V2.w = vlsr(V2.w,V0.w)
+// V3.w = vlsr(V3.w,V0.w)
+// V2 = vand(V2,V1)
+// V3 = vand(V3,V1)
+// V2.h = vpacke(V3.w,V2.w)
+// V2.hf = V2.h
+SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
+ SelectionDAG &DAG) const {
+
+ MVT ResTy = ty(PredOp);
+ const SDLoc &dl(PredOp);
+
+ SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
+ // Get the hi and lo regs
+ SDValue HiReg =
+ DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
+ SDValue LoReg =
+ DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
+ // Get constant #1 and splat into vector V1
+ SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
+ SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
+ SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ SDValue(RegConst, 0));
+ // Splat the hi and lo args
+ SDNode *SplatHi =
+ DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
+ SDNode *SplatLo =
+ DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
+ // vand between splatted const and const
+ SDNode *PredTransfer =
+ DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
+ SDValue(SplatConst, 0), SDValue(RegConst, 0));
+ // Get the prefixsum
+ SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
+ SDValue(PredTransfer, 0));
+ // Get the vsub
+ SDNode *Vsub =
+ DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
+ SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
+ // Get vlsr for hi and lo
+ SDNode *IndexShift_hi =
+ DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+ SDValue(SplatHi, 0), SDValue(Vsub, 0));
+ SDNode *IndexShift_lo =
+ DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
+ SDValue(SplatLo, 0), SDValue(Vsub, 0));
+ // Get vand of hi and lo
+ SDNode *MaskOff_hi =
+ DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+ SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
+ SDNode *MaskOff_lo =
+ DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
+ SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
+ // Pack them
+ SDNode *Pack =
+ DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
+ SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
+ SDNode *Convert =
+ DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
+ return SDValue(Convert, 0);
+}
+
SDValue
HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
// Catch invalid conversion ops (just in case).
@@ -2343,6 +2464,13 @@ HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
MVT FpTy = ResTy.getVectorElementType();
+ if (Op.getOpcode() == ISD::UINT_TO_FP) {
+ if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
+ return LowerHvxPred32ToFp(Op, DAG);
+ if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
+ return LowerHvxPred64ToFp(Op, DAG);
+ }
+
if (Subtarget.useHVXIEEEFPOps()) {
// There are only conversions to f16.
if (FpTy == MVT::f16) {
diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
new file mode 100644
index 0000000000000..dfb2bc83537dc
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v32i1tov32f32.ll
@@ -0,0 +1,25 @@
+; Tests lowering of v32i1 to v32f32
+
+; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b,+hvx-ieee-fp \
+; RUN: -stop-after=hexagon-isel %s -o - | FileCheck %s
+
+; CHECK: [[R0:%[0-9]+]]:hvxvr = V6_lvsplatw killed %{{[0-9]+}}
+; CHECK-NEXT: [[R1:%[0-9]+]]:intregs = A2_tfrsi 1
+; CHECK-NEXT: [[R2:%[0-9]+]]:hvxvr = V6_lvsplatw [[R1]]
+; CHECK-NEXT: [[R3:%[0-9]+]]:hvxqr = V6_vandvrt [[R2]], [[R1]]
+; CHECK-NEXT: [[R4:%[0-9]+]]:hvxvr = V6_vprefixqw killed [[R3]]
+; CHECK-NEXT: [[R5:%[0-9]+]]:hvxvr = V6_vsubw killed [[R4]], [[R2]]
+; CHECK-NEXT: [[R6:%[0-9]+]]:hvxvr = V6_vlsrwv killed [[R0]], killed [[R5]]
+; CHECK-NEXT: [[R7:%[0-9]+]]:hvxvr = V6_vand killed [[R6]], [[R2]]
+; CHECK-NEXT: [[R8:%[0-9]+]]:hvxvr = V6_vconv_sf_w killed [[R7]]
+; CHECK-NEXT: hvxvr = V6_vadd_sf_sf [[R8]], [[R8]]
+
+define <32 x float> @uitofp_i1(<32 x i16> %in0, <32 x i16> %in1) #0
+{
+ %q1 = icmp eq <32 x i16> %in0, %in1
+ %fp0 = uitofp <32 x i1> %q1 to <32 x float>
+ %out = fadd <32 x float> %fp0, %fp0
+ ret <32 x float> %out
+}
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv79" "target-features"="+hvxv79,+hvx-length128b" }
diff --git a/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll
new file mode 100644
index 0000000000000..8769e345655e9
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel-uinttofp-v64i1tov64f16.ll
@@ -0,0 +1,27 @@
+; Tests the conversion pattern for v64i1 to v64f16
+; r0, r3 and r9 registers are i32 types converted from
+; v32i1 via a bitcasting sequence.
+
+; RUN: llc -march=hexagon -mattr=+hvxv79,+hvx-length128b \
+; RUN: %s -verify-machineinstrs -o - | FileCheck %s
+
+; CHECK: [[V3:v[0-9]+]] = vsplat([[R0:r[0-9]+]])
+; CHECK: [[Q0:q[0-9]+]] = vand([[V3]],[[R0]])
+; CHECK: [[V4:v[0-9]+]].w = prefixsum([[Q0]])
+; CHECK: [[V5:v[0-9]+]].w = vsub([[V4]].w,[[V3]].w)
+; CHECK: [[V1:v[0-9]+]] = vsplat(r
+; CHECK: [[V2:v[0-9]+]] = vsplat(r
+; CHECK: [[V6:v[0-9]+]].w = vlsr([[V1]].w,[[V5]].w)
+; CHECK: [[V7:v[0-9]+]].w = vlsr([[V2]].w,[[V5]].w)
+; CHECK: [[V8:v[0-9]+]] = vand([[V6]],[[V3]])
+; CHECK: [[V9:v[0-9]+]] = vand([[V7]],[[V3]])
+; CHECK: [[V10:v[0-9]+]].h = vpacke([[V9]].w,[[V8]].w)
+; CHECK: .hf = [[V10]].h
+
+define <64 x half> @uitofp_i1(<64 x i16> %in0, <64 x i16> %in1)
+{
+ %in = icmp eq <64 x i16> %in0, %in1
+ %fp0 = uitofp <64 x i1> %in to <64 x half>
+ %out = fadd <64 x half> %fp0, %fp0
+ ret <64 x half> %out
+}
More information about the llvm-commits
mailing list