<html>
  <head>
    <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">On 10/14/2015 11:15 AM, Marek Olšák via
      llvm-commits wrote:<br>
    </div>
    <blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-unicode">
        <pre wrap="">Please review.

Marek
</pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0001-AMDGPU-SI-use-S_AND-for-i1-trunc.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From d7b62060d11f2d1c303438b45bb7d29b8ba012ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Wed, 7 Oct 2015 02:51:54 +0200
Subject: [PATCH 1/4] AMDGPU/SI: use S_AND for i1 trunc

---
 lib/Target/AMDGPU/SIInstructions.td | 4 ++--
 test/CodeGen/AMDGPU/trunc.ll        | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

</pre>
      </div>
    </blockquote>
    LGTM<br>
    <br>
    <blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">
</pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0002-AMDGPU-SI-use-S_OR-for-fneg-fabs-f32.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 17a7e3f389813d7823179c5f6f5d7794edd97650 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Wed, 7 Oct 2015 03:02:42 +0200
Subject: [PATCH 2/4] AMDGPU/SI: use S_OR for fneg (fabs f32)

---
 lib/Target/AMDGPU/SIInstructions.td |  3 +--
 test/CodeGen/AMDGPU/fneg-fabs.ll    | 27 +++++++++------------------
 2 files changed, 10 insertions(+), 20 deletions(-)</pre>
      </div>
    </blockquote>
    LGTM<br>
    <br>
    <pre wrap=""><div class="moz-txt-sig">

</div></pre>
    <blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
      type="cite"><br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0003-AMDGPU-SI-select-S_ABS_I32-when-possible.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 96d649f157b8e77d2313f8d2f7a1b27a15279145 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Sat, 10 Oct 2015 21:23:23 +0200
Subject: [PATCH 3/4] AMDGPU/SI: select S_ABS_I32 when possible

---
 lib/Target/AMDGPU/SIInstrInfo.cpp      | 25 +++++++++++++++++++++++++
 lib/Target/AMDGPU/SIInstructions.td    |  5 +++++
 test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll |  4 +---
 test/CodeGen/AMDGPU/sminmax.ll         | 24 ++++++++++++++++++++++++
 4 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 test/CodeGen/AMDGPU/sminmax.ll

diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1af08a8..d7904b0 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2290,6 +2290,31 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
       }
       break;
 
+    case AMDGPU::S_ABS_I32: {
+      MachineBasicBlock &MBB = *Inst->getParent();
+      MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+      MachineBasicBlock::iterator MII = Inst;
+      DebugLoc DL = Inst->getDebugLoc();
+
+      MachineOperand &Dest = Inst->getOperand(0);
+      MachineOperand &Src = Inst->getOperand(1);
+      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+      BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
+        .addImm(0)
+        .addReg(Src.getReg());</pre>
      </div>
    </blockquote>
    <br>
    I think this could break if Src is a subregister. This probably
    needs what splitScalar64BitUnaryOp does to handle subregisters. I
    would also prefer splitting this into a separate function<br>
    <blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">
+
+      BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
+        .addReg(Src.getReg())
+        .addReg(TmpReg);
+
+      MRI.replaceRegWith(Dest.getReg(), ResultReg);
+      addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+      Inst->eraseFromParent();
+      continue;
+    }
+
     case AMDGPU::S_BFE_U64:
     case AMDGPU::S_BFM_B64:
       llvm_unreachable("Moving this op to VALU not implemented");
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index ed75b4d..9e0046b 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -2177,6 +2177,11 @@ def : Pat <
      (S_MOV_B32 0), sub1))
 >;
 
+def : Pat <
+  (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+  (S_ABS_I32 $x)
+>;
+
 //===----------------------------------------------------------------------===//
 // SOP2 Patterns
 //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
index 8bf094b..ca8ddba 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
@@ -8,9 +8,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
 declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
 
 ; FUNC-LABEL: {{^}}s_abs_i32:
-; SI: s_sub_i32
-; SI: s_max_i32
-; SI: s_endpgm
+; SI: s_abs_i32
 
 ; EG: SUB_INT
 ; EG: MAX_INT
diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll
new file mode 100644
index 0000000..a481965
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sminmax.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}s_abs_i32:
+; GCN: s_abs_i32
+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
+  %neg = sub i32 0, %val
+  %cond = icmp sgt i32 %val, %neg
+  %res = select i1 %cond, i32 %val, i32 %neg
+  store i32 %res, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_i32:
+; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32, i32 addrspace(1)* %src, align 4
+  %neg = sub i32 0, %val
+  %cond = icmp sgt i32 %val, %neg
+  %res = select i1 %cond, i32 %val, i32 %neg
+  store i32 %res, i32 addrspace(1)* %out, align 4
+  ret void
+}
<div class="moz-txt-sig">-- 
2.1.4</div></pre>
      </div>
    </blockquote>
    <br>
    Can you add another testcase where the output has another scalar
    instruction use before the store, and another for v2i32/v4i32 to
    make sure subregister sources work<br>
    <br>
    <blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap=""><div class="moz-txt-sig">

</div></pre>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"><legend
          class="mimeAttachmentHeaderName">0004-AMDGPU-SI-handle-undef-for-llvm.SI.packf16.patch</legend></fieldset>
      <br>
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">From 0d0216ee98873d1c47af89d330a1af1fef9fcd18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Sun, 11 Oct 2015 21:40:37 +0200
Subject: [PATCH 4/4] AMDGPU/SI: handle undef for llvm.SI.packf16

---
 lib/Target/AMDGPU/SIISelLowering.cpp   |  5 +++++
 test/CodeGen/AMDGPU/llvm.SI.packf16.ll | 29 +++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 test/CodeGen/AMDGPU/llvm.SI.packf16.ll

diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 804b5e6..8d7a4d1 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1091,6 +1091,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                        DAG.getConstant(2, DL, MVT::i32), // P0
                        Op.getOperand(1), Op.getOperand(2), Glue);
   }
+  case AMDGPUIntrinsic::SI_packf16:
+    if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())</pre>
      </div>
    </blockquote>
    Should this be ||?<br>
    <blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
      type="cite">
      <div class="moz-text-plain" wrap="true" graphical-quote="true"
        style="font-family: -moz-fixed; font-size: 12px;"
        lang="x-western">
        <pre wrap="">
+      return DAG.getUNDEF(MVT::i32);
+    else
+      return Op;</pre>
      </div>
    </blockquote>
    No return after else<br>
    <br>
  </body>
</html>