[PATCH] D111530: [TargetLowering] Optimize expanded SRL/SHL fed into SETCC ne/eq 0

Thu Feb 17 06:21:34 PST 2022

spatel added a comment.

I didn't step through everything here, so I may not be seeing the entire problem.

There are 2 or more relatively simple folds that are missing both in IR and DAG, and adding those might solve this more generally and more easily than the proposed patch.

Here are examples in Alive2:
https://alive2.llvm.org/ce/z/KNQuYm
https://alive2.llvm.org/ce/z/LKLpo3

So it might be possible to solve the motivating bug without starting from icmp/setcc -- it's really a problem of combining shifts and funnel shifts in a way that is better for analysis/codegen.

We can show a potential codegen improvement for x86 with a minimal example:

  % cat fsh.ll          
  declare i32 @llvm.fshl.i32 (i32, i32, i32)
  declare i32 @llvm.fshr.i32 (i32, i32, i32)

  define i32 @src(i32 %x, i32 %y) {
    %y5 = shl i32 %y, 5
    %fun3 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 27)
    %or2 = or i32 %fun3, %y5
    ret i32 %or2
  }

  define i32 @tgt(i32 %x, i32 %y) {
    %x5 = shl i32 %x, 5
    %rot3 = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 5)
    %or2 = or i32 %rot3, %x5
    ret i32 %or2
  }

  % llc -o - fsh.ll
  _src:
  	shldl	$5, %esi, %edi  ; avoid shld if possible because it is slow on some targets
  	movl	%esi, %eax
  	shll	$5, %eax
  	orl	%edi, %eax
  	ret

  _tgt:
  	movl	%esi, %eax
  	shll	$5, %edi
  	roll	$5, %eax
  	orl	%edi, %eax
  	retq

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111530/new/

https://reviews.llvm.org/D111530