[llvm] [llvm][ARM]Add ARM widen strings pass (PR #107120)

Mon Sep 9 07:33:19 PDT 2024

nasherm wrote:

I've reduced this patch down to adding the pass, as well as tests, without enabling it.

With respect to performance gain I've seen a jump of around 1% on some of our benchmarks.

I used the following (truncated) IR to show the difference in generated assembly
```
# example.ll
@.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1

; Function Attrs: nounwind
define hidden void @foo() #0 {
entry:
  %something = alloca [10 x i8], align 1
  %arraydecay = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0
  %call = call ptr @strcpy(ptr %arraydecay, ptr @.str)
  %arraydecay1 = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0
  %call2 = call i32 @bar(ptr %arraydecay1)
  ret void
```
**Optimization off**
```
$ opt example.ll -O2 -S | llc -mtriple=arm-arm-none-eabi -o -
..........
foo:
	.fnstart
@ %bb.0:                                @ %entry
	.save	{r4, lr}
	push	{r4, lr}
	.pad	#24
	sub	sp, sp, #24
	ldr	r12, .LCPI0_0
	add	r0, sp, #4
	mov	lr, r0
	ldm	r12!, {r1, r2, r3, r4}
	stm	lr!, {r1, r2, r3, r4}
	ldrb	r1, [r12]
	strb	r1, [lr]
	bl	bar
	add	sp, sp, #24
	pop	{r4, lr}
	mov	pc, lr
	.p2align	2
@ %bb.1:
.LCPI0_0:
	.long	.L.str
.Lfunc_end0:
	.size	foo, .Lfunc_end0-foo
	.fnend
                                        @ -- End function
	.type	.L.str,%object                  @ @.str
	.section	.rodata.str1.4,"aMS",%progbits,1
	.p2align	2, 0x0
.L.str:
	.asciz	"1234567891234567"
	.size	.L.str, 17

	.section	".note.GNU-stack","",%progbits
	.eabi_attribute	30, 1	@ Tag_ABI_optimization_goals
```

**Optmization on**
```
$ opt example.ll  -passes="default<O2>,arm-widen-strings" -S | llc -mtriple=arm-arm-none-eabi -o -
foo:
	.fnstart
@ %bb.0:                                @ %entry
	.save	{r4, r5, r11, lr}
	push	{r4, r5, r11, lr}
	.pad	#40
	sub	sp, sp, #40
	ldr	r12, .LCPI0_0
	add	r0, sp, #20
	mov	r2, r0
	ldm	r12, {r1, r3, r4, r5, lr}
	stm	r2, {r1, r3, r4, r5, lr}
	bl	bar
	add	sp, sp, #40
	pop	{r4, r5, r11, lr}
	mov	pc, lr
	.p2align	2
@ %bb.1:
.LCPI0_0:
	.long	.L.str
.Lfunc_end0:
	.size	foo, .Lfunc_end0-foo
	.fnend
                                        @ -- End function
	.type	.L__unnamed_1,%object           @ @0
	.section	.rodata.str1.1,"aMS",%progbits,1
.L__unnamed_1:
	.asciz	"1234567891234567"
	.size	.L__unnamed_1, 17

	.type	.L.str,%object                  @ @.str
	.section	.rodata,"a",%progbits
	.p2align	2, 0x0
.L.str:
	.asciz	"1234567891234567\000\000\000"
	.size	.L.str, 20

	.section	".note.GNU-stack","",%progbits
	.eabi_attribute	30, 1	@ Tag_ABI_optimization_goals

```

Diff of assembly for readability
```
24,27c24,27
< 	.save	{r4, lr}
< 	push	{r4, lr}
< 	.pad	#24
< 	sub	sp, sp, #24
---
> 	.save	{r4, r5, r11, lr}
> 	push	{r4, r5, r11, lr}
> 	.pad	#40
> 	sub	sp, sp, #40
29,34c29,32
< 	add	r0, sp, #4
< 	mov	lr, r0
< 	ldm	r12!, {r1, r2, r3, r4}
< 	stm	lr!, {r1, r2, r3, r4}
< 	ldrb	r1, [r12]
< 	strb	r1, [lr]
---
> 	add	r0, sp, #20
> 	mov	r2, r0
> 	ldm	r12, {r1, r3, r4, r5, lr}
> 	stm	r2, {r1, r3, r4, r5, lr}
36,37c34,35
< 	add	sp, sp, #24
< 	pop	{r4, lr}
---
> 	add	sp, sp, #40
> 	pop	{r4, r5, r11, lr}
46a45,50
> 	.type	.L__unnamed_1,%object           @ @0
> 	.section	.rodata.str1.1,"aMS",%progbits,1
> .L__unnamed_1:
> 	.asciz	"1234567891234567"
> 	.size	.L__unnamed_1, 17
> 
48c52
< 	.section	.rodata.str1.4,"aMS",%progbits,1
---
> 	.section	.rodata,"a",%progbits
51,52c55,56
< 	.asciz	"1234567891234567"
< 	.size	.L.str, 17
---
> 	.asciz	"1234567891234567\000\000\000"
> 	.size	.L.str, 20

```

https://github.com/llvm/llvm-project/pull/107120