[PATCH] D86114: [SVE] Lower fixed length vXi8/vXi16 SDIV to scalable

Cameron McInally via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 20 11:13:05 PDT 2020


cameron.mcinally added a comment.

In D86114#2228977 <https://reviews.llvm.org/D86114#2228977>, @paulwalker-arm wrote:

>> That said the custom lowering for i8/i16 doesn't have to be SVE specific as there's the alternative approach of using normal ISD nodes to do the widening so that only the final SDIV lowering is SVE specific.

I tried that at first, but the NEON assembly was pretty ugly. I may have made a mistake there, but if not, it was all scalarized. The SVE lowering is uglier, but the generated code seems cleaner.

  Extending fixed width vectors
  =======================
  
  sdiv_v8i8:
  	.cfi_startproc
  	umov	w11, v1.b[1]
  	umov	w9, v1.b[2]
  	umov	w13, v0.b[1]
  	fmov	s2, w11
  	umov	w10, v1.b[3]
  	umov	w8, v1.b[4]
  	umov	w12, v0.b[2]
  	umov	w14, v0.b[3]
  	fmov	s3, w13
  	umov	w11, v0.b[4]
  	zip1	v1.8b, v1.8b, v0.8b
  	zip1	v0.8b, v0.8b, v0.8b
  	mov	v2.h[1], w9
  	shl	v1.4h, v1.4h, #8
  	shl	v0.4h, v0.4h, #8
  	mov	v3.h[1], w12
  	mov	v2.h[2], w10
  	sshr	v1.4h, v1.4h, #8
  	sshr	v0.4h, v0.4h, #8
  	mov	v3.h[2], w14
  	mov	v2.h[3], w8
  	umov	w13, v1.h[0]
  	umov	w8, v0.h[0]
  	mov	v3.h[3], w11
  	shl	v2.4h, v2.4h, #8
  	umov	w9, v1.h[1]
  	umov	w10, v1.h[2]
  	umov	w12, v0.h[1]
  	umov	w11, v0.h[2]
  	fmov	s0, w13
  	fmov	s1, w8
  	shl	v3.4h, v3.4h, #8
  	sshr	v2.4h, v2.4h, #8
  	mov	v0.s[1], w9
  	fmov	s4, w9
  	mov	v1.s[1], w12
  	umov	w8, v2.h[1]
  	umov	w9, v2.h[2]
  	umov	w13, v2.h[0]
  	fmov	s2, w12
  	sshr	v3.4h, v3.4h, #8
  	mov	v4.s[1], w10
  	mov	v2.s[1], w11
  	umov	w12, v3.h[0]
  	shl	v0.2s, v0.2s, #16
  	shl	v1.2s, v1.2s, #16
  	umov	w10, v3.h[1]
  	umov	w11, v3.h[2]
  	fmov	s3, w13
  	sshr	v0.2s, v0.2s, #16
  	fmov	s5, w12
  	sshr	v1.2s, v1.2s, #16
  	ptrue	p0.s, vl2
  	shl	v4.2s, v4.2s, #16
  	shl	v2.2s, v2.2s, #16
  	mov	v3.s[1], w8
  	fmov	s6, w8
  	sshr	v4.2s, v4.2s, #16
  	sdivr	z0.s, p0/m, z0.s, z1.s
  	sshr	v1.2s, v2.2s, #16
  	mov	v5.s[1], w10
  	fmov	s2, w10
  	sdiv	z1.s, p0/m, z1.s, z4.s
  	mov	v6.s[1], w9
  	mov	v2.s[1], w11
  	shl	v3.2s, v3.2s, #16
  	shl	v4.2s, v5.2s, #16
  	shl	v5.2s, v6.2s, #16
  	shl	v2.2s, v2.2s, #16
  	sshr	v3.2s, v3.2s, #16
  	sshr	v4.2s, v4.2s, #16
  	sdivr	z3.s, p0/m, z3.s, z4.s
  	sshr	v4.2s, v5.2s, #16
  	sshr	v2.2s, v2.2s, #16
  	sdiv	z2.s, p0/m, z2.s, z4.s
  	uzp1	v2.4h, v3.4h, v2.4h
  	uzp1	v0.4h, v0.4h, v1.4h
  	uzp1	v0.8b, v0.8b, v2.8b
  	ret


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86114/new/

https://reviews.llvm.org/D86114



More information about the llvm-commits mailing list