[all-commits] [llvm/llvm-project] b0847b: AMDGPU/GlobalISel: Insert freeze when splitting ve...

Fri Nov 18 16:05:32 PST 2022

  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: b0847b0095e10e784dc241ebb19f39edd9c6a7f8
      https://github.com/llvm/llvm-project/commit/b0847b0095e10e784dc241ebb19f39edd9c6a7f8
  Author: Matt Arsenault <Matthew.Arsenault at amd.com>
  Date:   2022-11-18 (Fri, 18 Nov 2022)

  Changed paths:
    M llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    M llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir

  Log Message:
  -----------
  AMDGPU/GlobalISel: Insert freeze when splitting vector G_SEXT_INREG

This transform is broken for undef or poison inputs without a freeze.
This is also broken in lots of other places where shifts are split
into 32-bit pieces.

Amt < 32 case:
; Broken: https://alive2.llvm.org/ce/z/7bb4vc
; Freezing the low half of the bits makes it correct
; Fixed: https://alive2.llvm.org/ce/z/zJAZFr
define i64 @src(i64 %val) {
  %shl = shl i64 %val, 55
  %shr = ashr i64 %shl, 55
  ret i64 %shr
}

define i64 @tgt(i64 %val) {
  %lo32 = trunc i64 %val to i32
  %shr.half = lshr i64 %val, 32
  %hi32 = trunc i64 %shr.half to i32
  %inreg.0 = shl i32 %lo32, 23
  %new.lo = ashr i32 %inreg.0, 23
  %new.hi = ashr i32 %new.lo, 31
  %zext.lo = zext i32 %new.lo to i64
  %zext.hi = zext i32 %new.hi to i64
  %hi.ins = shl i64 %zext.hi, 32
  %or = or i64 %hi.ins, %zext.lo
  ret i64 %or
}

Amt == 32 case:
Broken: https://alive2.llvm.org/ce/z/5f4qwQ
Fixed: https://alive2.llvm.org/ce/z/A2hWWF
This one times out alive; works if argument is made noundef or
scaled down to a smaller bitwidth.

define i64 @src(i64 %val) {
  %shl = shl i64 %val, 32
  %shr = ashr i64 %shl, 32
  ret i64 %shr
}

define i64 @tgt(i64 %val) {
  %lo32 = trunc i64 %val to i32
  %shr.half = lshr i64 %val, 32
  %hi32 = trunc i64 %shr.half to i32
  %new.hi = ashr i32 %lo32, 31
  %zext.lo = zext i32 %lo32 to i64
  %zext.hi = zext i32 %new.hi to i64
  %hi.ins = shl i64 %zext.hi, 32
  %or = or i64 %hi.ins, %zext.lo
  ret i64 %or
}

Amt > 32 case:
; Correct: https://alive2.llvm.org/ce/z/tvrhPf
define i64 @src(i64 %val) {
  %shl = shl i64 %val, 9
  %shr = ashr i64 %shl, 9
  ret i64 %shr
}

define i64 @tgt(i64 %val) {
  %lo32 = trunc i64 %val to i32
  %lshr = lshr i64 %val, 32
  %hi32 = trunc i64 %lshr to i32
  %inreg.0 = shl i32 %hi32, 9
  %new.hi = ashr i32 %inreg.0, 9
  %zext.lo = zext i32 %lo32 to i64
  %zext.hi = zext i32 %new.hi to i64
  %hi.ins = shl i64 %zext.hi, 32
  %or = or i64 %hi.ins, %zext.lo
  ret i64 %or
}