<html>
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
</head>
<body text="#000000" bgcolor="#FFFFFF">
<div class="moz-cite-prefix">On 10/14/2015 11:15 AM, Marek Olšák via
llvm-commits wrote:<br>
</div>
<blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
type="cite">
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-unicode">
<pre wrap="">Please review.
Marek
</pre>
</div>
<br>
<fieldset class="mimeAttachmentHeader"><legend
class="mimeAttachmentHeaderName">0001-AMDGPU-SI-use-S_AND-for-i1-trunc.patch</legend></fieldset>
<br>
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap="">From d7b62060d11f2d1c303438b45bb7d29b8ba012ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Wed, 7 Oct 2015 02:51:54 +0200
Subject: [PATCH 1/4] AMDGPU/SI: use S_AND for i1 trunc
---
lib/Target/AMDGPU/SIInstructions.td | 4 ++--
test/CodeGen/AMDGPU/trunc.ll | 8 ++++----
2 files changed, 6 insertions(+), 6 deletions(-)
</pre>
</div>
</blockquote>
LGTM<br>
<br>
<blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
type="cite">
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap="">
</pre>
</div>
<br>
<fieldset class="mimeAttachmentHeader"><legend
class="mimeAttachmentHeaderName">0002-AMDGPU-SI-use-S_OR-for-fneg-fabs-f32.patch</legend></fieldset>
<br>
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap="">From 17a7e3f389813d7823179c5f6f5d7794edd97650 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Wed, 7 Oct 2015 03:02:42 +0200
Subject: [PATCH 2/4] AMDGPU/SI: use S_OR for fneg (fabs f32)
---
lib/Target/AMDGPU/SIInstructions.td | 3 +--
test/CodeGen/AMDGPU/fneg-fabs.ll | 27 +++++++++------------------
2 files changed, 10 insertions(+), 20 deletions(-)</pre>
</div>
</blockquote>
LGTM<br>
<br>
<pre wrap=""><div class="moz-txt-sig">
</div></pre>
<blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
type="cite"><br>
<fieldset class="mimeAttachmentHeader"><legend
class="mimeAttachmentHeaderName">0003-AMDGPU-SI-select-S_ABS_I32-when-possible.patch</legend></fieldset>
<br>
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap="">From 96d649f157b8e77d2313f8d2f7a1b27a15279145 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Sat, 10 Oct 2015 21:23:23 +0200
Subject: [PATCH 3/4] AMDGPU/SI: select S_ABS_I32 when possible
---
lib/Target/AMDGPU/SIInstrInfo.cpp | 25 +++++++++++++++++++++++++
lib/Target/AMDGPU/SIInstructions.td | 5 +++++
test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll | 4 +---
test/CodeGen/AMDGPU/sminmax.ll | 24 ++++++++++++++++++++++++
4 files changed, 55 insertions(+), 3 deletions(-)
create mode 100644 test/CodeGen/AMDGPU/sminmax.ll
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1af08a8..d7904b0 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2290,6 +2290,31 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
break;
+ case AMDGPU::S_ABS_I32: {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src = Inst->getOperand(1);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
+ .addImm(0)
+ .addReg(Src.getReg());</pre>
</div>
</blockquote>
<br>
I think this could break if Src is a subregister. This probably
needs what splitScalar64BitUnaryOp does to handle subregisters. I
would also prefer splitting this into a separate function<br>
<blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
type="cite">
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap="">
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
+ .addReg(Src.getReg())
+ .addReg(TmpReg);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+ Inst->eraseFromParent();
+ continue;
+ }
+
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index ed75b4d..9e0046b 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -2177,6 +2177,11 @@ def : Pat <
(S_MOV_B32 0), sub1))
>;
+def : Pat <
+ (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+ (S_ABS_I32 $x)
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
index 8bf094b..ca8ddba 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
@@ -8,9 +8,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
; FUNC-LABEL: {{^}}s_abs_i32:
-; SI: s_sub_i32
-; SI: s_max_i32
-; SI: s_endpgm
+; SI: s_abs_i32
; EG: SUB_INT
; EG: MAX_INT
diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll
new file mode 100644
index 0000000..a481965
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sminmax.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}s_abs_i32:
+; GCN: s_abs_i32
+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
+ %neg = sub i32 0, %val
+ %cond = icmp sgt i32 %val, %neg
+ %res = select i1 %cond, i32 %val, i32 %neg
+ store i32 %res, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_i32:
+; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+ %val = load i32, i32 addrspace(1)* %src, align 4
+ %neg = sub i32 0, %val
+ %cond = icmp sgt i32 %val, %neg
+ %res = select i1 %cond, i32 %val, i32 %neg
+ store i32 %res, i32 addrspace(1)* %out, align 4
+ ret void
+}
<div class="moz-txt-sig">--
2.1.4</div></pre>
</div>
</blockquote>
<br>
Can you add another testcase where the output has another scalar
instruction use before the store, and another for v2i32/v4i32 to
make sure subregister sources work<br>
<br>
<blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
type="cite">
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap=""><div class="moz-txt-sig">
</div></pre>
</div>
<br>
<fieldset class="mimeAttachmentHeader"><legend
class="mimeAttachmentHeaderName">0004-AMDGPU-SI-handle-undef-for-llvm.SI.packf16.patch</legend></fieldset>
<br>
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap="">From 0d0216ee98873d1c47af89d330a1af1fef9fcd18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <a moz-do-not-send="true" class="moz-txt-link-rfc2396E" href="mailto:marek.olsak@amd.com"><marek.olsak@amd.com></a>
Date: Sun, 11 Oct 2015 21:40:37 +0200
Subject: [PATCH 4/4] AMDGPU/SI: handle undef for llvm.SI.packf16
---
lib/Target/AMDGPU/SIISelLowering.cpp | 5 +++++
test/CodeGen/AMDGPU/llvm.SI.packf16.ll | 29 +++++++++++++++++++++++++++++
2 files changed, 34 insertions(+)
create mode 100644 test/CodeGen/AMDGPU/llvm.SI.packf16.ll
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 804b5e6..8d7a4d1 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1091,6 +1091,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(2, DL, MVT::i32), // P0
Op.getOperand(1), Op.getOperand(2), Glue);
}
+ case AMDGPUIntrinsic::SI_packf16:
+ if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())</pre>
</div>
</blockquote>
Should this be ||?<br>
<blockquote
cite="mid:CAAxE2A5fQxYPhaDfqFkT+KvPCWP+o1rWXWW2ZsQR8toFtH4+yw@mail.gmail.com"
type="cite">
<div class="moz-text-plain" wrap="true" graphical-quote="true"
style="font-family: -moz-fixed; font-size: 12px;"
lang="x-western">
<pre wrap="">
+ return DAG.getUNDEF(MVT::i32);
+ else
+ return Op;</pre>
</div>
</blockquote>
No return after else<br>
<br>
</body>
</html>