[llvm] [clang] [ValueTracking] Add dominating condition support in computeKnownBits() (PR #73662)

Sun Dec 10 19:51:58 PST 2023

yonghong-song wrote:

Hi, @nikic,
This patch caused a bpf verifier regression with one of bpf selftests. The details can be found in kernel bpf mailing list.
   https://lore.kernel.org/bpf/0ff5f011-7524-4550-89eb-bb2c89f699d6@linux.dev/

Note that bpf verification failure here does not mean that generated code is incorrect. It means llvm generates 'more complex' code and the current verifier cannot handle that.

I created a unit test like below:
```
 $ cat iters.bpf.o.i
 typedef unsigned long long __u64;

 struct bpf_iter_num {
  __u64 __opaque[1];
 } __attribute__((aligned(8)));

 extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __attribute__((weak))  __attribute__((section(".ksyms")));
 extern int *bpf_iter_num_next(struct bpf_iter_num *it) __attribute__((weak)) __attribute__((section(".ksyms")));
 extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __attribute__((weak)) __attribute__((section(".ksyms")));

 struct {
  int data[32];
  int n;
 } loop_data;

 int iter_arr_with_actual_elem_count(const void *ctx)
 {
  int i, n = loop_data.n, sum = 0;

  if (n > (sizeof(loop_data.data) / sizeof((loop_data.data)[0])))
   return 0;

  for ( struct bpf_iter_num ___it __attribute__((aligned(8), cleanup(bpf_iter_num_destroy))), *___p __attribute__((unused))  = ( bpf_iter_num_new(&___it, (0), (n)), (void)bpf_iter_num_destroy, (void *)0); ({ int *___t = bpf_iter_num_next(&___it);  (___t && ((i) = *___t, (i) >= (0) && (i) < (n))); }); ) {

   sum += loop_data.data[i];
  }

  return sum;
 }
```

The compilation flag:
  clang -O2 -mcpu=v4 iters.bpf.o.i -c --target=bpf -mllvm -print-after-all

For a llvm whose top commit is the patch, the assembly looks like
```
0000000000000000 <iter_arr_with_actual_elem_count>:
       0:       b4 07 00 00 00 00 00 00 w7 = 0x0
       1:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
       3:       61 16 80 00 00 00 00 00 r6 = *(u32 *)(r1 + 0x80)
       4:       26 06 1b 00 20 00 00 00 if w6 > 0x20 goto +0x1b <LBB0_4>
       5:       bf a8 00 00 00 00 00 00 r8 = r10
       6:       07 08 00 00 f8 ff ff ff r8 += -0x8
       7:       bf 81 00 00 00 00 00 00 r1 = r8
       8:       b4 02 00 00 00 00 00 00 w2 = 0x0
       9:       bc 63 00 00 00 00 00 00 w3 = w6
      10:       85 10 00 00 ff ff ff ff call -0x1
      11:       bf 81 00 00 00 00 00 00 r1 = r8
      12:       85 10 00 00 ff ff ff ff call -0x1
      13:       15 00 02 00 00 00 00 00 if r0 == 0x0 goto +0x2 <LBB0_3>

0000000000000070 <LBB0_2>:
      14:       81 01 00 00 00 00 00 00 r1 = *(s32 *)(r0 + 0x0)  <=== a sign extension here, upper 32bit may be 0xffffffff
      15:       ae 61 04 00 00 00 00 00 if w1 < w6 goto +0x4 <LBB0_5>  <=== refine the value range of r1/w1 depending on which branch is taken.

0000000000000080 <LBB0_3>:
      16:       bf a1 00 00 00 00 00 00 r1 = r10
      17:       07 01 00 00 f8 ff ff ff r1 += -0x8
      18:       85 10 00 00 ff ff ff ff call -0x1
      19:       05 00 0c 00 00 00 00 00 goto +0xc <LBB0_4>

00000000000000a0 <LBB0_5>:
      20:       67 01 00 00 02 00 00 00 r1 <<= 0x2 <=== reaching here from insn 15, the verifier assumes top 32bit is not zero
                                                                               <=== and verification will fail at insn 23.
      21:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
      23:       0f 12 00 00 00 00 00 00 r2 += r1
      24:       61 28 00 00 00 00 00 00 r8 = *(u32 *)(r2 + 0x0)
      25:       0c 78 00 00 00 00 00 00 w8 += w7
      26:       bf a1 00 00 00 00 00 00 r1 = r10
      27:       07 01 00 00 f8 ff ff ff r1 += -0x8
      28:       85 10 00 00 ff ff ff ff call -0x1
      29:       bc 87 00 00 00 00 00 00 w7 = w8
      30:       15 00 f1 ff 00 00 00 00 if r0 == 0x0 goto -0xf <LBB0_3>
      31:       05 00 ee ff 00 00 00 00 goto -0x12 <LBB0_2>

0000000000000100 <LBB0_4>:
      32:       bc 70 00 00 00 00 00 00 w0 = w7
      33:       95 00 00 00 00 00 00 00 exit
```
In the above, I added a few comments to show why verification failure. The more detail can be found in
  https://lore.kernel.org/bpf/0ff5f011-7524-4550-89eb-bb2c89f699d6@linux.dev/
as well.

For a llvm whose top commit is the patch below this commit, the assembly looks like
```
0000000000000000 <iter_arr_with_actual_elem_count>:
       0:       b4 07 00 00 00 00 00 00 w7 = 0x0
       1:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll
       3:       61 16 80 00 00 00 00 00 r6 = *(u32 *)(r1 + 0x80)
       4:       26 06 1c 00 20 00 00 00 if w6 > 0x20 goto +0x1c <LBB0_5>
       5:       bf a8 00 00 00 00 00 00 r8 = r10
       6:       07 08 00 00 f8 ff ff ff r8 += -0x8
       7:       bf 81 00 00 00 00 00 00 r1 = r8
       8:       b4 02 00 00 00 00 00 00 w2 = 0x0
       9:       bc 63 00 00 00 00 00 00 w3 = w6
      10:       85 10 00 00 ff ff ff ff call -0x1
      11:       bf 81 00 00 00 00 00 00 r1 = r8
      12:       85 10 00 00 ff ff ff ff call -0x1
      13:       15 00 03 00 00 00 00 00 if r0 == 0x0 goto +0x3 <LBB0_4>

0000000000000070 <LBB0_2>:
      14:       61 01 00 00 00 00 00 00 r1 = *(u32 *)(r0 + 0x0)  <=== unsigned extension, upper 32bit of r1 is 0.
      15:       c6 01 01 00 00 00 00 00 if w1 s< 0x0 goto +0x1 <LBB0_4>
      16:       ce 61 04 00 00 00 00 00 if w1 s< w6 goto +0x4 <LBB0_6>
                     <=== w1 (lower 32bit of r1) value range is determined here.

0000000000000088 <LBB0_4>:
      17:       bf a1 00 00 00 00 00 00 r1 = r10
      18:       07 01 00 00 f8 ff ff ff r1 += -0x8
      19:       85 10 00 00 ff ff ff ff call -0x1
      20:       05 00 0c 00 00 00 00 00 goto +0xc <LBB0_5>

00000000000000a8 <LBB0_6>:
      21:       67 01 00 00 02 00 00 00 r1 <<= 0x2 <=== reaching from insn 16, now we have a much smaller range of 'r1'
                                                                               <=== and verifier will be okay with insn 24 below.
      22:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll
      24:       0f 12 00 00 00 00 00 00 r2 += r1
      25:       61 28 00 00 00 00 00 00 r8 = *(u32 *)(r2 + 0x0)
      26:       0c 78 00 00 00 00 00 00 w8 += w7
      27:       bf a1 00 00 00 00 00 00 r1 = r10
      28:       07 01 00 00 f8 ff ff ff r1 += -0x8
      29:       85 10 00 00 ff ff ff ff call -0x1
      30:       bc 87 00 00 00 00 00 00 w7 = w8
      31:       15 00 f1 ff 00 00 00 00 if r0 == 0x0 goto -0xf <LBB0_4>
      32:       05 00 ed ff 00 00 00 00 goto -0x13 <LBB0_2>

0000000000000108 <LBB0_5>:
      33:       bc 70 00 00 00 00 00 00 w0 = w7
      34:       95 00 00 00 00 00 00 00 exit
```

With '-mllvm -print-after-all', I am able to find why this patch caused the problem.
For with this patch, the IR eventually used sext right before BPF DAG->DAG Pattern Instruction Selection
```
...
land.end8:                                        ; preds = %land.end8.preheader, %for.body
  %call222 = phi ptr [ %call2, %for.body ], [ %call219, %land.end8.preheader ]
  %sum.021 = phi i32 [ %add, %for.body ], [ 0, %land.end8.preheader ]
  %1 = load i32, ptr %call222, align 4, !tbaa !8
  %idxprom = sext i32 %1 to i64
  %2 = icmp ult i32 %1, %0
  br i1 %2, label %for.body, label %for.cond.cleanup

for.cond.cleanup:                                 ; preds = %land.end8, %for.body, %if.end
  %sum.0.lcssa = phi i32 [ 0, %if.end ], [ %sum.021, %land.end8 ], [ %add, %for.body ]
  call void @bpf_iter_num_destroy(ptr noundef nonnull %___it) #3
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %___it) #3
  br label %cleanup

for.body:                                         ; preds = %land.end8
  %arrayidx = getelementptr inbounds [32 x i32], ptr @loop_data, i64 0, i64 %idxprom
  %3 = load i32, ptr %arrayidx, align 4, !tbaa !8
  %add = add nsw i32 %3, %sum.021
  %call2 = call ptr @bpf_iter_num_next(ptr noundef nonnull %___it) #3
  %tobool.not = icmp eq ptr %call2, null
  br i1 %tobool.not, label %for.cond.cleanup, label %land.end8, !llvm.loop !9
```
You can see that idxprom is sign extended and the variable is used to construct the array index. The current
verifier won't be able to handle this so verification failed.

For without this patch, the IR eventually removed sext since it knows the value range at CorrelatedValuePropagationPass
```
...
land.end8:                                        ; preds = %for.cond
  %1 = load i32, ptr %call2, align 4, !tbaa !8
  %cmp3 = icmp sgt i32 %1, -1
  %cmp6 = icmp slt i32 %1, %0
  %2 = and i1 %cmp3, %cmp6 
  br i1 %2, label %for.body, label %for.cond.cleanup

for.cond.cleanup:                                 ; preds = %for.cond, %land.end8
  call void @bpf_iter_num_destroy(ptr noundef nonnull %___it) #3
  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %___it) #3
  br label %cleanup

for.body:                                         ; preds = %land.end8
  %idxprom = zext nneg i32 %1 to i64
  %arrayidx = getelementptr inbounds [32 x i32], ptr @loop_data, i64 0, i64 %idxprom
  %3 = load i32, ptr %arrayidx, align 4, !tbaa !8
  %add = add nsw i32 %sum.0, %3
  br label %for.cond, !llvm.loop !9
...
``` 

So my question is that we would like to undo this transformation in llvm so we can please the verifier. Any suggestion on how to do this? Thanks!

cc @4ast cc @eddyz87 @anakryiko @jemarch

https://github.com/llvm/llvm-project/pull/73662