[PATCH] D85364: [SVE][WIP] Implement lowering for fixed width select

Wed Aug 5 15:08:53 PDT 2020

cameron.mcinally added a comment.

For this IR test and `-aarch64-sve-vector-bits-min=512`:

  target triple = "aarch64-unknown-linux-gnu"

  define void @select(<8 x double>* %a, <8 x double>* %b, <8 x i1>* %c) #0 {
  ; CHECK: select:
    %mask = load <8 x i1>, <8 x i1>* %c
    %op1 = load <8 x double>, <8 x double>* %a
    %op2 = load <8 x double>, <8 x double>* %b
    %sel = select <8 x i1> %mask, <8 x double> %op1, <8 x double> %op2
    store <8 x double> %sel, <8 x double>* %a, align 4 
    ret void
  }       

  attributes #0 = { "target-features"="+sve" }

this patch will generate:

  	ldrb	w8, [x2]
  	ptrue	p0.d, vl8
  	mov	x9, sp
  	ptrue	p1.d
  	lsr	w10, w8, #7
  	lsr	w11, w8, #6
  	lsr	w12, w8, #5
  	lsr	w13, w8, #4

          // Extend the mask: v8i1 -> v8i64
  	sbfx	x10, x10, #0, #1
  	sbfx	x11, x11, #0, #1
  	stp	x11, x10, [sp, #48]
  	sbfx	x11, x12, #0, #1
  	sbfx	x12, x13, #0, #1
  	lsr	w10, w8, #3
  	stp	x12, x11, [sp, #32]
  	lsr	w11, w8, #2
  	sbfx	x10, x10, #0, #1
  	sbfx	x11, x11, #0, #1
  	stp	x11, x10, [sp, #16]
  	sbfx	x10, x8, #0, #1
  	lsr	w8, w8, #1
  	sbfx	x8, x8, #0, #1
  	stp	x10, x8, [sp]

          // Load extended mask into ZPR: nxv2i64
  	ld1d	{ z0.d }, p0/z, [x9]
  	ld1d	{ z1.d }, p0/z, [x0]
  	ld1d	{ z2.d }, p0/z, [x1]

          // Truncate the mask: nxv2i64 -> nxv2i1
  	and	z0.d, z0.d, #0x1
  	cmpne	p2.d, p1/z, z0.d, #0

          // Combine the select mask with the fixed VL mask
          // Note: Not sure if this is *really* needed, or if we can trust the select mask,
          //       but that's a separate issue.
  	and	p1.b, p1/z, p0.b, p2.b

  	sel	z0.d, p1, z1.d, z2.d
  	st1d	{ z0.d }, p0, [x0]

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85364/new/

https://reviews.llvm.org/D85364