[PATCHES] AMDGPU/SI: Small ISel improvements

Wed Oct 14 11:32:15 PDT 2015

On 10/14/2015 11:15 AM, Marek Olšák via llvm-commits wrote:
> Please review.
>
> Marek
>
> 0001-AMDGPU-SI-use-S_AND-for-i1-trunc.patch
>
>
>  From d7b62060d11f2d1c303438b45bb7d29b8ba012ca Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=<marek.olsak at amd.com>
> Date: Wed, 7 Oct 2015 02:51:54 +0200
> Subject: [PATCH 1/4] AMDGPU/SI: use S_AND for i1 trunc
>
> ---
>   lib/Target/AMDGPU/SIInstructions.td | 4 ++--
>   test/CodeGen/AMDGPU/trunc.ll        | 8 ++++----
>   2 files changed, 6 insertions(+), 6 deletions(-)
>
LGTM

>
> 0002-AMDGPU-SI-use-S_OR-for-fneg-fabs-f32.patch
>
>
>  From 17a7e3f389813d7823179c5f6f5d7794edd97650 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=<marek.olsak at amd.com>
> Date: Wed, 7 Oct 2015 03:02:42 +0200
> Subject: [PATCH 2/4] AMDGPU/SI: use S_OR for fneg (fabs f32)
>
> ---
>   lib/Target/AMDGPU/SIInstructions.td |  3 +--
>   test/CodeGen/AMDGPU/fneg-fabs.ll    | 27 +++++++++------------------
>   2 files changed, 10 insertions(+), 20 deletions(-)
LGTM

>
> 0003-AMDGPU-SI-select-S_ABS_I32-when-possible.patch
>
>
>  From 96d649f157b8e77d2313f8d2f7a1b27a15279145 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=<marek.olsak at amd.com>
> Date: Sat, 10 Oct 2015 21:23:23 +0200
> Subject: [PATCH 3/4] AMDGPU/SI: select S_ABS_I32 when possible
>
> ---
>   lib/Target/AMDGPU/SIInstrInfo.cpp      | 25 +++++++++++++++++++++++++
>   lib/Target/AMDGPU/SIInstructions.td    |  5 +++++
>   test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll |  4 +---
>   test/CodeGen/AMDGPU/sminmax.ll         | 24 ++++++++++++++++++++++++
>   4 files changed, 55 insertions(+), 3 deletions(-)
>   create mode 100644 test/CodeGen/AMDGPU/sminmax.ll
>
> diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
> index 1af08a8..d7904b0 100644
> --- a/lib/Target/AMDGPU/SIInstrInfo.cpp
> +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
> @@ -2290,6 +2290,31 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>         }
>         break;
>   
> +    case AMDGPU::S_ABS_I32: {
> +      MachineBasicBlock &MBB = *Inst->getParent();
> +      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
> +      MachineBasicBlock::iterator MII = Inst;
> +      DebugLoc DL = Inst->getDebugLoc();
> +
> +      MachineOperand &Dest = Inst->getOperand(0);
> +      MachineOperand &Src = Inst->getOperand(1);
> +      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
> +      unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
> +
> +      BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
> +        .addImm(0)
> +        .addReg(Src.getReg());

I think this could break if Src is a subregister. This probably needs 
what splitScalar64BitUnaryOp does to handle subregisters. I would also 
prefer splitting this into a separate function
> +
> +      BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
> +        .addReg(Src.getReg())
> +        .addReg(TmpReg);
> +
> +      MRI.replaceRegWith(Dest.getReg(), ResultReg);
> +      addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
> +      Inst->eraseFromParent();
> +      continue;
> +    }
> +
>       case AMDGPU::S_BFE_U64:
>       case AMDGPU::S_BFM_B64:
>         llvm_unreachable("Moving this op to VALU not implemented");
> diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
> index ed75b4d..9e0046b 100644
> --- a/lib/Target/AMDGPU/SIInstructions.td
> +++ b/lib/Target/AMDGPU/SIInstructions.td
> @@ -2177,6 +2177,11 @@ def : Pat <
>        (S_MOV_B32 0), sub1))
>   >;
>   
> +def : Pat <
> +  (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
> +  (S_ABS_I32 $x)
> +>;
> +
>   //===----------------------------------------------------------------------===//
>   // SOP2 Patterns
>   //===----------------------------------------------------------------------===//
> diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
> index 8bf094b..ca8ddba 100644
> --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
> +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
> @@ -8,9 +8,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
>   declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
>   
>   ; FUNC-LABEL: {{^}}s_abs_i32:
> -; SI: s_sub_i32
> -; SI: s_max_i32
> -; SI: s_endpgm
> +; SI: s_abs_i32
>   
>   ; EG: SUB_INT
>   ; EG: MAX_INT
> diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll
> new file mode 100644
> index 0000000..a481965
> --- /dev/null
> +++ b/test/CodeGen/AMDGPU/sminmax.ll
> @@ -0,0 +1,24 @@
> +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
> +
> +; FUNC-LABEL: {{^}}s_abs_i32:
> +; GCN: s_abs_i32
> +define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
> +  %neg = sub i32 0, %val
> +  %cond = icmp sgt i32 %val, %neg
> +  %res = select i1 %cond, i32 %val, i32 %neg
> +  store i32 %res, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}v_abs_i32:
> +; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
> +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
> +define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
> +  %val = load i32, i32 addrspace(1)* %src, align 4
> +  %neg = sub i32 0, %val
> +  %cond = icmp sgt i32 %val, %neg
> +  %res = select i1 %cond, i32 %val, i32 %neg
> +  store i32 %res, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> -- 2.1.4

Can you add another testcase where the output has another scalar 
instruction use before the store, and another for v2i32/v4i32 to make 
sure subregister sources work

>
> 0004-AMDGPU-SI-handle-undef-for-llvm.SI.packf16.patch
>
>
>  From 0d0216ee98873d1c47af89d330a1af1fef9fcd18 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?=<marek.olsak at amd.com>
> Date: Sun, 11 Oct 2015 21:40:37 +0200
> Subject: [PATCH 4/4] AMDGPU/SI: handle undef for llvm.SI.packf16
>
> ---
>   lib/Target/AMDGPU/SIISelLowering.cpp   |  5 +++++
>   test/CodeGen/AMDGPU/llvm.SI.packf16.ll | 29 +++++++++++++++++++++++++++++
>   2 files changed, 34 insertions(+)
>   create mode 100644 test/CodeGen/AMDGPU/llvm.SI.packf16.ll
>
> diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
> index 804b5e6..8d7a4d1 100644
> --- a/lib/Target/AMDGPU/SIISelLowering.cpp
> +++ b/lib/Target/AMDGPU/SIISelLowering.cpp
> @@ -1091,6 +1091,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
>                          DAG.getConstant(2, DL, MVT::i32), // P0
>                          Op.getOperand(1), Op.getOperand(2), Glue);
>     }
> +  case AMDGPUIntrinsic::SI_packf16:
> +    if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())
Should this be ||?
> +      return DAG.getUNDEF(MVT::i32);
> +    else
> +      return Op;
No return after else

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151014/21a93ac2/attachment.html>