[PATCH] D48278: [SelectionDAG] Fold redundant masking operations of shifted value
Diogo N. Sampaio via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 9 03:36:19 PDT 2018
dnsampaio marked 4 inline comments as done.
dnsampaio added a comment.
In https://reviews.llvm.org/D48278#1155201, @spatel wrote:
> This patch was reverted with https://reviews.llvm.org/rL336453 because it caused:
> https://bugs.llvm.org/show_bug.cgi?id=38084
Sorry about that.
> Beyond that, I don't understand the motivation. The patch increases the latency of a computation. Why is that beneficial? The x86 diff doesn't look like a win to me.
It reduces the number of computation operations, from 3 to 2, and the number of constants kept int constants for performing the masking, from 2 to 1.
I don't see how it increases the latency. If you are going to perform the masking and the shift anyway.
> I don't know what the ARM/AArch output looked like before this patch. Always check in the baseline tests before making a code change, so we have that as a reference (and in case the patch is reverted, we won't lose the test coverage that motivated the code patch).
See in line comments for code change.
================
Comment at: lib/CodeGen/SelectionDAG/DAGCombiner.cpp:4115
+ const SDValue &SHIFT = AND->getOperand(0);
+ if ((SHIFT.getNumOperands() != 2) || (!SHIFT.hasOneUse()))
+ return SDValue();
----------------
samparker wrote:
> Why not check that you have a supported shift here and potentially exit early?
All tests are on the header of the function already (except for the case ISD::ROTL). Making a single big if statement just makes it harder to understand and harder to someone add an exception of the invalid rule . Technically speaking, having one big test or one after the other is the same thing.
================
Comment at: lib/CodeGen/SelectionDAG/DAGCombiner.cpp:4180
+ N0Opcode = ISD::SRL;
+ /* fall-thru */
+ case ISD::SRL:
----------------
samparker wrote:
> use LLVM_FALLTHROUGH
Added LLVM_FALLTHROUGH. But gcc 8.1 still gives warning, so I'm also keeping the /* fall-through */
/home/diosam01/LLVM/local/src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp: In member function ‘llvm::SDValue {anonymous}::DAGCombiner::foldRedundantShiftedMasks(llvm::SDNode*)’:
/home/diosam01/LLVM/local/src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:4180:18: warning: this statement may fall through [-Wimplicit-fallthrough=]
N0Opcode = ISD::SRL;
~~~~~~~~~^~~~~
/home/diosam01/LLVM/local/src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:4182:5: note: here
case ISD::SRL:
^~~~
================
Comment at: lib/CodeGen/SelectionDAG/DAGCombiner.cpp:4203
+ DAG.getNode(N0Opcode, DL, VT, ShiftTheAND, SHIFT.getOperand(1));
+ AddToWorklist(OtherUser);
+ return NewShift;
----------------
samparker wrote:
> I think you should be adding NewShift instead.
No, NewShift and its users is added to the work list automatically at DAGCombiner.cpp lines 1483 and 1484. In the log, you can see that a transformed node is always visited again in the same combining phase. I want the other user of this value, as it might be the solely user left.
================
Comment at: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll:3-12
+define i32 @ror(i32 %a) {
+entry:
+ %m2 = and i32 %a, 3855
+ %shl = shl i32 %a, 24
+ %shr = lshr i32 %a, 8
+ %or = or i32 %shl, %shr
+ %m1 = and i32 %or, 251658255
----------------
** Before patch:
mov w9, #15
mov w8, #3855
movk w9, #3840, lsl #16
and w8, w0, w8
and w9, w9, w0, ror #8
orr w0, w9, w8
** After patch:
mov w8, #3855
and w8, w0, w8
orr w0, w8, w8, ror #8
================
Comment at: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll:18-26
+define i32 @shl(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 172
+ %2 = shl i32 %0, 8
+ %3 = and i32 %2, 44032
+ %4 = or i32 %1, %3
----------------
** Before patch:
sxth w8, w0
mov w9, #172
mov w10, #44032
and w9, w8, w9
and w8, w10, w8, lsl #8
orr w0, w9, w8
** After patch:
mov w8, #172
and w8, w0, w8
orr w0, w8, w8, lsl #8
================
Comment at: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll:18-26
+define i32 @shl(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 172
+ %2 = shl i32 %0, 8
+ %3 = and i32 %2, 44032
+ %4 = or i32 %1, %3
----------------
dnsampaio wrote:
> ** Before patch:
> sxth w8, w0
> mov w9, #172
> mov w10, #44032
> and w9, w8, w9
> and w8, w10, w8, lsl #8
> orr w0, w9, w8
> ** After patch:
> mov w8, #172
> and w8, w0, w8
> orr w0, w8, w8, lsl #8
** In x86-64:
* Before patch:
movswl %di, %eax
movl %eax, %ecx
andl $172, %ecx
shll $8, %eax
andl $44032, %eax # imm = 0xAC00
orl %ecx, %eax
* After patch:
andl $172, %edi
movl %edi, %eax
shll $8, %eax
leal (%rax,%rdi), %eax
================
Comment at: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll:32-40
+define i32 @lshr(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 44032
+ %2 = lshr i32 %0, 8
+ %3 = and i32 %2, 172
+ %4 = or i32 %1, %3
----------------
** Before patch:
sxth w8, w0
mov w9, #44032
mov w10, #172
and w9, w8, w9
and w8, w10, w8, lsr #8
orr w0, w9, w8
** After patch:
mov w8, #44032
and w8, w0, w8
orr w0, w8, w8, lsr #8
================
Comment at: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll:32-40
+define i32 @lshr(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 44032
+ %2 = lshr i32 %0, 8
+ %3 = and i32 %2, 172
+ %4 = or i32 %1, %3
----------------
dnsampaio wrote:
> ** Before patch:
> sxth w8, w0
> mov w9, #44032
> mov w10, #172
> and w9, w8, w9
> and w8, w10, w8, lsr #8
> orr w0, w9, w8
> ** After patch:
> mov w8, #44032
> and w8, w0, w8
> orr w0, w8, w8, lsr #8
** In x86-64
* Before patch:
movswl %di, %eax
movl %eax, %ecx
andl $44032, %ecx # imm = 0xAC00
shrl $8, %eax
andl $172, %eax
orl %ecx, %eax
* After patch:
andl $44032, %edi # imm = 0xAC00
movl %edi, %eax
shrl $8, %eax
leal (%rax,%rdi), %eax
================
Comment at: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll:46-54
+define i32 @ashr(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 44032
+ %2 = ashr i32 %0, 8
+ %3 = and i32 %2, 172
+ %4 = or i32 %1, %3
----------------
** Before patch:
sxth w8, w0
mov w9, #44032
mov w10, #172
and w9, w8, w9
and w8, w10, w8, lsr #8
orr w0, w9, w8
** After patch:
mov w8, #44032
and w8, w0, w8
orr w0, w8, w8, lsr #8
================
Comment at: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll:46-54
+define i32 @ashr(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 44032
+ %2 = ashr i32 %0, 8
+ %3 = and i32 %2, 172
+ %4 = or i32 %1, %3
----------------
dnsampaio wrote:
> ** Before patch:
> sxth w8, w0
> mov w9, #44032
> mov w10, #172
> and w9, w8, w9
> and w8, w10, w8, lsr #8
> orr w0, w9, w8
> ** After patch:
> mov w8, #44032
> and w8, w0, w8
> orr w0, w8, w8, lsr #8
** In x86-64
* Before patch:
movswl %di, %eax
movl %eax, %ecx
andl $44032, %ecx # imm = 0xAC00
shrl $8, %eax
andl $172, %eax
orl %ecx, %eax
* After patch:
andl $44032, %edi # imm = 0xAC00
movl %edi, %eax
shrl $8, %eax
leal (%rax,%rdi), %eax
================
Comment at: test/CodeGen/ARM/FoldRedundantShiftedMasking.ll:22
+; ARM-NEXT: mov pc, lr
+
+define i32 @shl(i16 %a) {
----------------
** Before patch:
mov r1, #15
mov r2, #15
orr r1, r1, #3840
orr r2, r2, #251658240
and r1, r0, r1
and r0, r2, r0, ror #8
orr r0, r0, r1
** After patch:
mov r1, #15
orr r1, r1, #3840
and r0, r0, r1
orr r0, r0, r0, ror #8
================
Comment at: test/CodeGen/ARM/FoldRedundantShiftedMasking.ll:23-34
+define i32 @shl(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 172
+ %2 = shl i32 %0, 8
+ %3 = and i32 %2, 44032
+ %4 = or i32 %1, %3
----------------
** Before patch:
lsl r0, r0, #16
mov r1, #172
and r0, r1, r0, asr #16
orr r0, r0, r0, lsl #8
** After patch:
and r0, r0, #172
orr r0, r0, r0, lsl #8
================
Comment at: test/CodeGen/ARM/FoldRedundantShiftedMasking.ll:36-44
+define i32 @lshr(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 44032
+ %2 = lshr i32 %0, 8
+ %3 = and i32 %2, 172
+ %4 = or i32 %1, %3
----------------
** Before patch:
lsl r0, r0, #16
mov r1, #44032
and r1, r1, r0, asr #16
asr r0, r0, #16
mov r2, #172
and r0, r2, r0, lsr #8
orr r0, r1, r0
** After patch:
and r0, r0, #44032
orr r0, r0, r0, lsr #8
================
Comment at: test/CodeGen/ARM/FoldRedundantShiftedMasking.ll:49-57
+define i32 @ashr(i16 %a) {
+entry:
+ %0 = sext i16 %a to i32
+ %1 = and i32 %0, 44032
+ %2 = ashr i32 %0, 8
+ %3 = and i32 %2, 172
+ %4 = or i32 %1, %3
----------------
** Before
lsl r0, r0, #16
mov r1, #44032
and r1, r1, r0, asr #16
asr r0, r0, #16
mov r2, #172
and r0, r2, r0, lsr #8
orr r0, r1, r0
** After
and r0, r0, #44032
orr r0, r0, r0, lsr #8
https://reviews.llvm.org/D48278
More information about the llvm-commits
mailing list