<html>

  <head>

    <meta content="text/html; charset=utf-8" http-equiv="Content-Type">

  </head>

  <body text="#000000" bgcolor="#FFFFFF">

    <div class="moz-cite-prefix">On 10/14/2015 11:15 AM, Marek Olšák via

      llvm-commits wrote:<br>

    </div>

    <blockquote

cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"

      type="cite">

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-unicode">

        <pre wrap="">Please review.

Marek

</pre>

      </div>

      <br>

      <fieldset class="mimeAttachmentHeader"><legend

          class="mimeAttachmentHeaderName">0001-AMDGPU-SI-use-S_AND-for-i1-trunc.patch</legend></fieldset>

      <br>

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap="">From d7b62060d11f2d1c303438b45bb7d29b8ba012ca Mon Sep 17 00:00:00 2001

From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>

Date: Wed, 7 Oct 2015 02:51:54 +0200

Subject: [PATCH 1/4] AMDGPU/SI: use S_AND for i1 trunc

---

 lib/Target/AMDGPU/SIInstructions.td | 4 ++--

 test/CodeGen/AMDGPU/trunc.ll        | 8 ++++----

 2 files changed, 6 insertions(+), 6 deletions(-)

</pre>

      </div>

    </blockquote>

    LGTM<br>

    <br>

    <blockquote

cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"

      type="cite">

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap="">

</pre>

      </div>

      <br>

      <fieldset class="mimeAttachmentHeader"><legend

          class="mimeAttachmentHeaderName">0002-AMDGPU-SI-use-S_OR-for-fneg-fabs-f32.patch</legend></fieldset>

      <br>

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap="">From 17a7e3f389813d7823179c5f6f5d7794edd97650 Mon Sep 17 00:00:00 2001

From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>

Date: Wed, 7 Oct 2015 03:02:42 +0200

Subject: [PATCH 2/4] AMDGPU/SI: use S_OR for fneg (fabs f32)

---

 lib/Target/AMDGPU/SIInstructions.td |  3 +--

 test/CodeGen/AMDGPU/fneg-fabs.ll    | 27 +++++++++------------------

 2 files changed, 10 insertions(+), 20 deletions(-)</pre>

      </div>

    </blockquote>

    LGTM<br>

    <br>

    <pre wrap=""><div class="moz-txt-sig">

</div></pre>

    <blockquote

cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"

      type="cite"><br>

      <fieldset class="mimeAttachmentHeader"><legend

          class="mimeAttachmentHeaderName">0003-AMDGPU-SI-select-S_ABS_I32-when-possible.patch</legend></fieldset>

      <br>

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap="">From 96d649f157b8e77d2313f8d2f7a1b27a15279145 Mon Sep 17 00:00:00 2001

From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>

Date: Sat, 10 Oct 2015 21:23:23 +0200

Subject: [PATCH 3/4] AMDGPU/SI: select S_ABS_I32 when possible

---

 lib/Target/AMDGPU/SIInstrInfo.cpp      | 25 +++++++++++++++++++++++++

 lib/Target/AMDGPU/SIInstructions.td    |  5 +++++

 test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll |  4 +---

 test/CodeGen/AMDGPU/sminmax.ll         | 24 ++++++++++++++++++++++++

 4 files changed, 55 insertions(+), 3 deletions(-)

 create mode 100644 test/CodeGen/AMDGPU/sminmax.ll

diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index 1af08a8..d7904b0 100644

--- a/lib/Target/AMDGPU/SIInstrInfo.cpp

+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp

@@ -2290,6 +2290,31 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {

       }

       break;

+    case AMDGPU::S_ABS_I32: {

+      MachineBasicBlock &MBB = *Inst->getParent();

+      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

+      MachineBasicBlock::iterator MII = Inst;

+      DebugLoc DL = Inst->getDebugLoc();

+

+      MachineOperand &Dest = Inst->getOperand(0);

+      MachineOperand &Src = Inst->getOperand(1);

+      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

+      unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

+

+      BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)

+        .addImm(0)

+        .addReg(Src.getReg());</pre>

      </div>

    </blockquote>

    <br>

    I think this could break if Src is a subregister. This probably

    needs what splitScalar64BitUnaryOp does to handle subregisters. I

    would also prefer splitting this into a separate function<br>

    <blockquote

cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"

      type="cite">

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap="">

+

+      BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)

+        .addReg(Src.getReg())

+        .addReg(TmpReg);

+

+      MRI.replaceRegWith(Dest.getReg(), ResultReg);

+      addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);

+      Inst->eraseFromParent();

+      continue;

+    }

+

     case AMDGPU::S_BFE_U64:

     case AMDGPU::S_BFM_B64:

       llvm_unreachable("Moving this op to VALU not implemented");

diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td

index ed75b4d..9e0046b 100644

--- a/lib/Target/AMDGPU/SIInstructions.td

+++ b/lib/Target/AMDGPU/SIInstructions.td

@@ -2177,6 +2177,11 @@ def : Pat <

      (S_MOV_B32 0), sub1))

 >;

+def : Pat <

+  (i32 (smax i32:$x, (i32 (ineg i32:$x)))),

+  (S_ABS_I32 $x)

+>;

+

 //===----------------------------------------------------------------------===//

 // SOP2 Patterns

 //===----------------------------------------------------------------------===//

diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll

index 8bf094b..ca8ddba 100644

--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll

+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll

@@ -8,9 +8,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone

 declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone

 ; FUNC-LABEL: {{^}}s_abs_i32:

-; SI: s_sub_i32

-; SI: s_max_i32

-; SI: s_endpgm

+; SI: s_abs_i32

 ; EG: SUB_INT

 ; EG: MAX_INT

diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll

new file mode 100644

index 0000000..a481965

--- /dev/null

+++ b/test/CodeGen/AMDGPU/sminmax.ll

@@ -0,0 +1,24 @@

+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s

+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s

+

+; FUNC-LABEL: {{^}}s_abs_i32:

+; GCN: s_abs_i32

+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {

+  %neg = sub i32 0, %val

+  %cond = icmp sgt i32 %val, %neg

+  %res = select i1 %cond, i32 %val, i32 %neg

+  store i32 %res, i32 addrspace(1)* %out, align 4

+  ret void

+}

+

+; FUNC-LABEL: {{^}}v_abs_i32:

+; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]

+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]

+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {

+  %val = load i32, i32 addrspace(1)* %src, align 4

+  %neg = sub i32 0, %val

+  %cond = icmp sgt i32 %val, %neg

+  %res = select i1 %cond, i32 %val, i32 %neg

+  store i32 %res, i32 addrspace(1)* %out, align 4

+  ret void

+}

<div class="moz-txt-sig">-- 

2.1.4</div></pre>

      </div>

    </blockquote>

    <br>

    Can you add another testcase where the output has another scalar

    instruction use before the store, and another for v2i32/v4i32 to

    make sure subregister sources work<br>

    <br>

    <blockquote

cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"

      type="cite">

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap=""><div class="moz-txt-sig">

</div></pre>

      </div>

      <br>

      <fieldset class="mimeAttachmentHeader"><legend

          class="mimeAttachmentHeaderName">0004-AMDGPU-SI-handle-undef-for-llvm.SI.packf16.patch</legend></fieldset>

      <br>

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap="">From 0d0216ee98873d1c47af89d330a1af1fef9fcd18 Mon Sep 17 00:00:00 2001

From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>

Date: Sun, 11 Oct 2015 21:40:37 +0200

Subject: [PATCH 4/4] AMDGPU/SI: handle undef for llvm.SI.packf16

---

 lib/Target/AMDGPU/SIISelLowering.cpp   |  5 +++++

 test/CodeGen/AMDGPU/llvm.SI.packf16.ll | 29 +++++++++++++++++++++++++++++

 2 files changed, 34 insertions(+)

 create mode 100644 test/CodeGen/AMDGPU/llvm.SI.packf16.ll

diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 804b5e6..8d7a4d1 100644

--- a/lib/Target/AMDGPU/SIISelLowering.cpp

+++ b/lib/Target/AMDGPU/SIISelLowering.cpp

@@ -1091,6 +1091,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

                        DAG.getConstant(2, DL, MVT::i32), // P0

                        Op.getOperand(1), Op.getOperand(2), Glue);

   }

+  case AMDGPUIntrinsic::SI_packf16:

+    if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())</pre>

      </div>

    </blockquote>

    Should this be ||?<br>

    <blockquote

cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"

      type="cite">

      <div class="moz-text-plain" wrap="true" graphical-quote="true"

        style="font-family: -moz-fixed; font-size: 12px;"

        lang="x-western">

        <pre wrap="">

+      return DAG.getUNDEF(MVT::i32);

+    else

+      return Op;</pre>

      </div>

    </blockquote>

    No return after else<br>

    <br>

  </body>

</html>