[llvm] r206120 - [PowerPC] Implement some additional TLI callbacks

Hal Finkel hfinkel at anl.gov
Sat Apr 12 16:34:31 PDT 2014


----- Original Message -----
> From: "Hal Finkel" <hfinkel at anl.gov>
> To: "Benjamin Kramer" <benny.kra at gmail.com>
> Cc: llvm-commits at cs.uiuc.edu
> Sent: Saturday, April 12, 2014 6:25:51 PM
> Subject: Re: [llvm] r206120 - [PowerPC] Implement some additional TLI callbacks
> 
> ----- Original Message -----
> > From: "Benjamin Kramer" <benny.kra at gmail.com>
> > To: "Hal Finkel" <hfinkel at anl.gov>
> > Cc: llvm-commits at cs.uiuc.edu
> > Sent: Saturday, April 12, 2014 6:20:58 PM
> > Subject: Re: [llvm] r206120 - [PowerPC] Implement some additional
> > TLI callbacks
> > 
> > 
> > On 12.04.2014, at 23:52, Hal Finkel <hfinkel at anl.gov> wrote:
> > 
> > > Author: hfinkel
> > > Date: Sat Apr 12 16:52:38 2014
> > > New Revision: 206120
> > > 
> > > URL: http://llvm.org/viewvc/llvm-project?rev=206120&view=rev
> > > Log:
> > > [PowerPC] Implement some additional TLI callbacks
> > > 
> > > Add implementations of:
> > >  bool isLegalICmpImmediate(int64_t Imm) const
> > >  bool isLegalAddImmediate(int64_t Imm) const
> > >  bool isTruncateFree(Type *Ty1, Type *Ty2) const
> > >  bool isTruncateFree(EVT VT1, EVT VT2) const
> > >  bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type
> > >  *Ty)
> > >  const
> > > 
> > > Unfortunately, this regresses counter-register-based loop
> > > formation
> > > because
> > > some of the loops now end up in forms were SE cannot compute loop
> > > counts.
> > > However, nevertheless, the test-suite results favor committing:
> > 
> > Can we teach SE to understand those forms?
> 
> Yes, I don't see why not.
> 
> For the purposes of discussion, here's one:
> 
> target datalayout = "E-m:e-i64:64-n32:64"
> target triple = "powerpc64-unknown-linux-gnu"
> 
> define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b)
> nounwind {
> entry:
>   %cmp3 = icmp sle i32 28395, %b
>   br i1 %cmp3, label %for.body.lr.ph, label %for.end
> 
> for.body.lr.ph:                                   ; preds = %entry
>   br label %for.body
> 
> for.body:                                         ; preds =
> %for.body.lr.ph, %for.body
>   %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
>   %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
>   %0 = load i8* %arrayidx, align 1
>   %conv = zext i8 %0 to i32
>   %add = add nsw i32 %conv, 1
>   %conv1 = trunc i32 %add to i8
>   store i8 %conv1, i8* %arrayidx, align 1
>   %inc = add nsw i32 %i.04, 1
>   %cmp = icmp sle i32 %inc, %b
>   br i1 %cmp, label %for.body, label %for.end
> 
> for.end:                                          ; preds =
> %for.body, %entry
>   ret void
> }

So that is how the loop appeared before the change, when SE could understand it:

Printing analysis 'Scalar Evolution Analysis' for function 'test_pos1_ir_sle':
Classifying expressions for: @test_pos1_ir_sle
  %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
  -->  {28395,+,1}<nuw><nsw><%for.body>		Exits: %b
...
Determining loop execution counts for: @test_pos1_ir_sle
Loop %for.body: backedge-taken count is (-28395 + %b)

Now it appears like this:

target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) #0 {
entry:
  %cmp3 = icmp sle i32 28395, %b
  br i1 %cmp3, label %for.body, label %for.end

for.body:                                         ; preds = %entry, %for.body
  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 28395, %entry ]
  %scevgep1 = getelementptr i8* %p, i64 %lsr.iv
  %0 = load i8* %scevgep1, align 1
  %conv = zext i8 %0 to i32
  %add = add nsw i32 %conv, 1
  %conv1 = trunc i32 %add to i8
  %scevgep = getelementptr i8* %p, i64 %lsr.iv
  store i8 %conv1, i8* %scevgep, align 1
  %lsr.iv.next = add nuw nsw i64 %lsr.iv, 1
  %tmp = trunc i64 %lsr.iv.next to i32
  %cmp = icmp sle i32 %tmp, %b
  br i1 %cmp, label %for.body, label %for.end

for.end:                                          ; preds = %for.body, %entry
  ret void
}

and SE does give a backedge-taken count:

Printing analysis 'Scalar Evolution Analysis' for function 'test_pos1_ir_sle':
Classifying expressions for: @test_pos1_ir_sle
  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 28395, %entry ]
  -->  {28395,+,1}<nuw><nsw><%for.body>		Exits: <<Unknown>>
...
Determining loop execution counts for: @test_pos1_ir_sle
Loop %for.body: Unpredictable backedge-taken count. 

 -Hal

> 
>  -Hal
> 
> > 
> > - Ben
> > 
> > > 
> > > SingleSource/Benchmarks/BenchmarkGame/puzzle: 26% speedup
> > > MultiSource/Benchmarks/FreeBench/analyzer/analyzer: 21% speedup
> > > MultiSource/Benchmarks/MiBench/automotive-susan/automotive-susan:
> > > 20% speedup
> > > SingleSource/Benchmarks/Polybench/linear-algebra/kernels/trisolv/trisolv:
> > > 19% speedup
> > > SingleSource/Benchmarks/Polybench/linear-algebra/kernels/gesummv/gesummv:
> > > 15% speedup
> > > MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2: 2%
> > > speedup
> > > 
> > > MultiSource/Benchmarks/VersaBench/bmm/bmm: 26% slowdown
> > > 
> > > Modified:
> > >    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> > >    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
> > >    llvm/trunk/test/CodeGen/PowerPC/ctrloop-le.ll
> > >    llvm/trunk/test/CodeGen/PowerPC/ctrloop-lt.ll
> > >    llvm/trunk/test/CodeGen/PowerPC/mcm-10.ll
> > >    llvm/trunk/test/CodeGen/PowerPC/mcm-11.ll
> > >    llvm/trunk/test/CodeGen/PowerPC/mcm-obj-2.ll
> > > 
> > > Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=206120&r1=206119&r2=206120&view=diff
> > > ==============================================================================
> > > --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
> > > +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Sat Apr 12
> > > 16:52:38 2014
> > > @@ -8795,6 +8795,42 @@ EVT PPCTargetLowering::getOptimalMemOpTy
> > >   }
> > > }
> > > 
> > > +/// \brief Returns true if it is beneficial to convert a load of
> > > a
> > > constant
> > > +/// to just the constant itself.
> > > +bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const
> > > APInt &Imm,
> > > +                                                          Type
> > > *Ty) const {
> > > +  assert(Ty->isIntegerTy());
> > > +
> > > +  unsigned BitSize = Ty->getPrimitiveSizeInBits();
> > > +  if (BitSize == 0 || BitSize > 64)
> > > +    return false;
> > > +  return true;
> > > +}
> > > +
> > > +bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2)
> > > const
> > > {
> > > +  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
> > > +    return false;
> > > +  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
> > > +  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
> > > +  return NumBits1 == 64 && NumBits2 == 32;
> > > +}
> > > +
> > > +bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
> > > +  if (!VT1.isInteger() || !VT2.isInteger())
> > > +    return false;
> > > +  unsigned NumBits1 = VT1.getSizeInBits();
> > > +  unsigned NumBits2 = VT2.getSizeInBits();
> > > +  return NumBits1 == 64 && NumBits2 == 32;
> > > +}
> > > +
> > > +bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const
> > > {
> > > +  return isInt<16>(Imm) || isUInt<16>(Imm);
> > > +}
> > > +
> > > +bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
> > > +  return isInt<16>(Imm) || isUInt<16>(Imm);
> > > +}
> > > +
> > > bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
> > >                                                       unsigned,
> > >                                                       bool *Fast)
> > >                                                       const {
> > > 
> > > Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=206120&r1=206119&r2=206120&view=diff
> > > ==============================================================================
> > > --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
> > > +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Sat Apr 12
> > > 16:52:38 2014
> > > @@ -447,6 +447,29 @@ namespace llvm {
> > >     /// by AM is legal for this target, for a load/store of the
> > >     specified type.
> > >     virtual bool isLegalAddressingMode(const AddrMode &AM, Type
> > >     *Ty)const;
> > > 
> > > +    /// isLegalICmpImmediate - Return true if the specified
> > > immediate is legal
> > > +    /// icmp immediate, that is the target has icmp instructions
> > > which can
> > > +    /// compare a register against the immediate without having
> > > to
> > > materialize
> > > +    /// the immediate into a register.
> > > +    bool isLegalICmpImmediate(int64_t Imm) const override;
> > > +
> > > +    /// isLegalAddImmediate - Return true if the specified
> > > immediate is legal
> > > +    /// add immediate, that is the target has add instructions
> > > which can
> > > +    /// add a register and the immediate without having to
> > > materialize
> > > +    /// the immediate into a register.
> > > +    bool isLegalAddImmediate(int64_t Imm) const override;
> > > +
> > > +    /// isTruncateFree - Return true if it's free to truncate a
> > > value of
> > > +    /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate
> > > a
> > > i64 value in
> > > +    /// register X1 to i32 by referencing its sub-register R1.
> > > +    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
> > > +    bool isTruncateFree(EVT VT1, EVT VT2) const override;
> > > +
> > > +    /// \brief Returns true if it is beneficial to convert a
> > > load
> > > of a constant
> > > +    /// to just the constant itself.
> > > +    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
> > > +                                           Type *Ty) const
> > > override;
> > > +
> > >     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode
> > >     *GA) const;
> > > 
> > >     /// getOptimalMemOpType - Returns the target specific optimal
> > >     type for load
> > > 
> > > Modified: llvm/trunk/test/CodeGen/PowerPC/ctrloop-le.ll
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ctrloop-le.ll?rev=206120&r1=206119&r2=206120&view=diff
> > > ==============================================================================
> > > --- llvm/trunk/test/CodeGen/PowerPC/ctrloop-le.ll (original)
> > > +++ llvm/trunk/test/CodeGen/PowerPC/ctrloop-le.ll Sat Apr 12
> > > 16:52:38 2014
> > > @@ -2,6 +2,9 @@ target datalayout = "E-p:64:64:64-i1:8:8
> > > target triple = "powerpc64-unknown-linux-gnu"
> > > ; RUN: llc < %s -march=ppc64 | FileCheck %s
> > > 
> > > +; XFAIL: *
> > > +; SE needs improvement
> > > +
> > > ; CHECK: test_pos1_ir_sle
> > > ; CHECK: bdnz
> > > ; a < b
> > > 
> > > Modified: llvm/trunk/test/CodeGen/PowerPC/ctrloop-lt.ll
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ctrloop-lt.ll?rev=206120&r1=206119&r2=206120&view=diff
> > > ==============================================================================
> > > --- llvm/trunk/test/CodeGen/PowerPC/ctrloop-lt.ll (original)
> > > +++ llvm/trunk/test/CodeGen/PowerPC/ctrloop-lt.ll Sat Apr 12
> > > 16:52:38 2014
> > > @@ -2,6 +2,9 @@ target datalayout = "E-p:64:64:64-i1:8:8
> > > target triple = "powerpc64-unknown-linux-gnu"
> > > ; RUN: llc < %s -march=ppc64 | FileCheck %s
> > > 
> > > +; XFAIL: *
> > > +; SE needs improvement
> > > +
> > > ; CHECK: test_pos1_ir_slt
> > > ; CHECK: bdnz
> > > ; a < b
> > > 
> > > Modified: llvm/trunk/test/CodeGen/PowerPC/mcm-10.ll
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/mcm-10.ll?rev=206120&r1=206119&r2=206120&view=diff
> > > ==============================================================================
> > > --- llvm/trunk/test/CodeGen/PowerPC/mcm-10.ll (original)
> > > +++ llvm/trunk/test/CodeGen/PowerPC/mcm-10.ll Sat Apr 12 16:52:38
> > > 2014
> > > @@ -18,7 +18,8 @@ entry:
> > > 
> > > ; CHECK-LABEL: test_fn_static:
> > > ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc at ha
> > > -; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc at l([[REG1]])
> > > +; CHECK: lwa {{[0-9]+}}, [[VAR]]@toc at l([[REG1]])
> > > +; CHECK-NOT: extsw
> > > ; CHECK: stw {{[0-9]+}}, [[VAR]]@toc at l([[REG1]])
> > > ; CHECK: .type [[VAR]], at object
> > > ; CHECK: .local [[VAR]]
> > > 
> > > Modified: llvm/trunk/test/CodeGen/PowerPC/mcm-11.ll
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/mcm-11.ll?rev=206120&r1=206119&r2=206120&view=diff
> > > ==============================================================================
> > > --- llvm/trunk/test/CodeGen/PowerPC/mcm-11.ll (original)
> > > +++ llvm/trunk/test/CodeGen/PowerPC/mcm-11.ll Sat Apr 12 16:52:38
> > > 2014
> > > @@ -18,7 +18,8 @@ entry:
> > > 
> > > ; CHECK-LABEL: test_file_static:
> > > ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc at ha
> > > -; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc at l([[REG1]])
> > > +; CHECK: lwa {{[0-9]+}}, [[VAR]]@toc at l([[REG1]])
> > > +; CHECK-NOT: extsw
> > > ; CHECK: stw {{[0-9]+}}, [[VAR]]@toc at l([[REG1]])
> > > ; CHECK: .type [[VAR]], at object
> > > ; CHECK: .data
> > > 
> > > Modified: llvm/trunk/test/CodeGen/PowerPC/mcm-obj-2.ll
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/mcm-obj-2.ll?rev=206120&r1=206119&r2=206120&view=diff
> > > ==============================================================================
> > > --- llvm/trunk/test/CodeGen/PowerPC/mcm-obj-2.ll (original)
> > > +++ llvm/trunk/test/CodeGen/PowerPC/mcm-obj-2.ll Sat Apr 12
> > > 16:52:38 2014
> > > @@ -22,7 +22,7 @@ entry:
> > > ; CHECK: Relocations [
> > > ; CHECK:   Section (2) .rela.text {
> > > ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
> > > -; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
> > > +; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM2]]
> > > ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
> > > 
> > > @gi = global i32 5, align 4
> > > @@ -39,7 +39,7 @@ entry:
> > > ; accessing file-scope variable gi.
> > > ;
> > > ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
> > > -; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
> > > +; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM3]]
> > > ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
> > > 
> > > define double @test_double_const() nounwind {
> > > 
> > > 
> > > _______________________________________________
> > > llvm-commits mailing list
> > > llvm-commits at cs.uiuc.edu
> > > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> > 
> > 
> 
> --
> Hal Finkel
> Assistant Computational Scientist
> Leadership Computing Facility
> Argonne National Laboratory
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory



More information about the llvm-commits mailing list