[llvm] 6803d08 - [DAG][PowerPC] Enable initial ISD::BITCAST SimplifyDemandedBits/SimplifyMultipleUseDemandedBits big-endian handling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 2 03:48:07 PST 2021
Author: Simon Pilgrim
Date: 2021-12-02T11:47:53Z
New Revision: 6803d08c38af7e3235e6cd249761511704e0f88d
URL: https://github.com/llvm/llvm-project/commit/6803d08c38af7e3235e6cd249761511704e0f88d
DIFF: https://github.com/llvm/llvm-project/commit/6803d08c38af7e3235e6cd249761511704e0f88d.diff
LOG: [DAG][PowerPC] Enable initial ISD::BITCAST SimplifyDemandedBits/SimplifyMultipleUseDemandedBits big-endian handling
This patch begins extending handling for peeking through bitcast nodes to big-endian targets as well as the existing little-endian case.
Differential Revision: https://reviews.llvm.org/D114676
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a6018dc3f490..737695b5eabe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -664,15 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
- // TODO - bigendian once we have test coverage.
- if (IsLE && SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -2166,15 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits(
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
- // TODO - bigendian once we have test coverage.
- if (IsLE && SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -2193,6 +2193,7 @@ bool TargetLowering::SimplifyDemandedBits(
KnownSrcBits, TLO, Depth + 1))
return true;
} else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
+ // TODO - bigendian once we have test coverage.
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index 844f4a506dc1..023a7ba8a06e 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -337,15 +337,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
;
; CHECK-BE-LABEL: test8elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxlxor v3, v3, v3
-; CHECK-BE-NEXT: vmrglh v3, v3, v2
-; CHECK-BE-NEXT: vmrghh v2, v2, v2
+; CHECK-BE-NEXT: vmrghh v3, v2, v2
+; CHECK-BE-NEXT: vmrglh v2, v2, v2
; CHECK-BE-NEXT: vextsh2w v3, v3
; CHECK-BE-NEXT: vextsh2w v2, v2
; CHECK-BE-NEXT: xvcvsxwsp vs0, v3
; CHECK-BE-NEXT: xvcvsxwsp vs1, v2
-; CHECK-BE-NEXT: stxv vs0, 16(r3)
-; CHECK-BE-NEXT: stxv vs1, 0(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: blr
entry:
%0 = sitofp <8 x i16> %a to <8 x float>
@@ -409,25 +408,24 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
;
; CHECK-BE-LABEL: test16elt_signed:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v2, 16(r4)
; CHECK-BE-NEXT: lxv v3, 0(r4)
-; CHECK-BE-NEXT: xxlxor v4, v4, v4
-; CHECK-BE-NEXT: vmrglh v5, v4, v3
-; CHECK-BE-NEXT: vmrglh v4, v4, v2
-; CHECK-BE-NEXT: vmrghh v3, v3, v3
-; CHECK-BE-NEXT: vmrghh v2, v2, v2
-; CHECK-BE-NEXT: vextsh2w v5, v5
+; CHECK-BE-NEXT: lxv v2, 16(r4)
+; CHECK-BE-NEXT: vmrghh v4, v3, v3
+; CHECK-BE-NEXT: vmrglh v3, v3, v3
+; CHECK-BE-NEXT: vextsh2w v3, v3
; CHECK-BE-NEXT: vextsh2w v4, v4
+; CHECK-BE-NEXT: xvcvsxwsp vs1, v3
+; CHECK-BE-NEXT: vmrghh v3, v2, v2
+; CHECK-BE-NEXT: vmrglh v2, v2, v2
+; CHECK-BE-NEXT: xvcvsxwsp vs0, v4
; CHECK-BE-NEXT: vextsh2w v3, v3
; CHECK-BE-NEXT: vextsh2w v2, v2
-; CHECK-BE-NEXT: xvcvsxwsp vs0, v5
-; CHECK-BE-NEXT: xvcvsxwsp vs1, v4
; CHECK-BE-NEXT: xvcvsxwsp vs2, v3
; CHECK-BE-NEXT: xvcvsxwsp vs3, v2
-; CHECK-BE-NEXT: stxv vs1, 48(r3)
-; CHECK-BE-NEXT: stxv vs3, 32(r3)
-; CHECK-BE-NEXT: stxv vs0, 16(r3)
-; CHECK-BE-NEXT: stxv vs2, 0(r3)
+; CHECK-BE-NEXT: stxv vs1, 16(r3)
+; CHECK-BE-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-NEXT: stxv vs3, 48(r3)
+; CHECK-BE-NEXT: stxv vs2, 32(r3)
; CHECK-BE-NEXT: blr
entry:
%a = load <16 x i16>, <16 x i16>* %0, align 32
diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
index 97e2dd122b76..939b18e5384b 100644
--- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll
@@ -190,7 +190,7 @@ define double @test10(<4 x i32> %a, <4 x i32> %b) {
; CHECK-BE-LABEL: test10:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis 3, 2, .LCPI9_0 at toc@ha
-; CHECK-BE-NEXT: vmrghw 3, 3, 2
+; CHECK-BE-NEXT: vmrghw 3, 3, 3
; CHECK-BE-NEXT: lfs 0, .LCPI9_0 at toc@l(3)
; CHECK-BE-NEXT: vmrglw 2, 3, 2
; CHECK-BE-NEXT: xsadddp 1, 34, 0
More information about the llvm-commits
mailing list