[PATCH] D88783: [InstCombine] matchFunnelShift - fold or(shl(a,x),lshr(b,sub(bw,x))) -> fshl(a,b,x) iff x < bw

Thu Oct 8 05:40:46 PDT 2020

spatel added inline comments.

================
Comment at: llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp:2090-2091
+    if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
+      KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
+      return KnownL.getMaxValue().ult(Width) ? L : nullptr;
+    }
----------------
lebedev.ri wrote:
> Either this needs a motivational comment, or this can be dropped.
> ```
> ----------------------------------------
> define i64 @src(i64 %x, i64 %y, i64 %a) {
> %0:
>   %mask = and i64 %a, -1
>   %shl = shl i64 %x, %mask
>   %sub = sub nsw nuw i64 64, %mask
>   %shr = lshr i64 %y, %sub
>   %r = or i64 %shl, %shr
>   ret i64 %r
> }
> =>
> define i64 @tgt(i64 %x, i64 %y, i64 %a) {
> %0:
>   %r = fshl i64 %x, i64 %y, i64 %a
>   ret i64 %r
> }
> Transformation seems to be correct!
> 
> ```
We can correctly convert to the intrinsic because the intrinsic makes guarantees about the shift amount (modulo bitwidth) that may not be present in the original IR.

But without proper masking/select for UB shift amounts, the expanded code will be worse in codegen. So this is an irreversible transform, and we can't do it universally.

So even with the computeKnownBits restriction in this patch, this transform may not be allowed. For example, can we fix this in codegen:
https://alive2.llvm.org/ce/z/IsuRoN

```
$ cat fsh.ll 
define i32 @fake_fshl(i32 %x, i32 %y, i32 %a) {
  %mask = and i32 %a, 31
  %shl = shl i32 %x, %mask
  %sub = sub nuw nsw i32 32, %mask
  %shr = lshr i32 %y, %sub
  %r = or i32 %shl, %shr
  ret i32 %r
}

define i32 @fshl(i32 %x, i32 %y, i32 %a) {
  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %a)
  ret i32 %r
}

declare i32 @llvm.fshl.i32(i32, i32, i32)

$ llc -o - fsh.ll -mtriple=riscv64
fake_fshl:                              # @fake_fshl
	sllw	a0, a0, a2
	addi	a3, zero, 32
	sub	a2, a3, a2
	srlw	a1, a1, a2
	or	a0, a0, a1
	ret
fshl:                                   # @fshl
	andi	a2, a2, 31
	sll	a0, a0, a2
	not	a2, a2
	slli	a1, a1, 32
	srli	a1, a1, 1
	srl	a1, a1, a2
	or	a0, a0, a1
	ret

```

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88783/new/

https://reviews.llvm.org/D88783