[llvm] 954cf9a - [mlgo][nfc] Refactor the log_reader.py utility

Thu Feb 2 14:43:32 PST 2023

Author: Mircea Trofin
Date: 2023-02-02T14:41:55-08:00
New Revision: 954cf9a7d485b729198c0f6aff1580b83f273397

URL: https://github.com/llvm/llvm-project/commit/954cf9a7d485b729198c0f6aff1580b83f273397
DIFF: https://github.com/llvm/llvm-project/commit/954cf9a7d485b729198c0f6aff1580b83f273397.diff

LOG: [mlgo][nfc] Refactor the log_reader.py utility

Small refactoring in preparation for tests for the interactive mode.
This allows reading the header, and performing observations, as explicit
steps. The latter is in particular necessary because the exit condition
for the interactive host will be that the child process (the compiler)
exited.

Added: 
    

Modified: 
    llvm/lib/Analysis/models/log_reader.py

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/models/log_reader.py b/llvm/lib/Analysis/models/log_reader.py
index 08342e565c0be..0fe090cb32ae8 100644

--- a/llvm/lib/Analysis/models/log_reader.py
+++ b/llvm/lib/Analysis/models/log_reader.py
@@ -4,10 +4,11 @@
 """
 import ctypes
 import dataclasses
+import io
 import json
 import math
 import sys
-import typing
+from typing import Optional
 
 _element_types = {
     'float': ctypes.c_float,
@@ -66,7 +67,7 @@ def __getitem__(self, index):
     return self._view[index]
 
 
-def read_tensor(fs: typing.BinaryIO, ts: TensorSpec) -> TensorValue:
+def read_tensor(fs: io.BufferedReader, ts: TensorSpec) -> TensorValue:
   size = math.prod(ts.shape) * ctypes.sizeof(ts.element_type)
   data = fs.read(size)
   return TensorValue(ts, data)
@@ -75,30 +76,46 @@ def read_tensor(fs: typing.BinaryIO, ts: TensorSpec) -> TensorValue:
 def pretty_print_tensor_value(tv: TensorValue):
   print(f'{tv.spec().name}: {",".join([str(v) for v in tv])}')
 
+def read_header(f: io.BufferedReader):
+  header = json.loads(f.readline())
+  tensor_specs = [TensorSpec.from_dict(ts) for ts in header['features']]
+  score_spec = TensorSpec.from_dict(
+      header['score']) if 'score' in header else None
+  advice_spec = TensorSpec.from_dict(
+      header['advice']) if 'advice' in header else None
+  return tensor_specs, score_spec, advice_spec
+
+
+def read_one_observation(context: Optional[str],
+                         event_str: str,
+                         f: io.BufferedReader,
+                         tensor_specs: list[TensorSpec],
+                         score_spec: Optional[TensorSpec]):
+  event = json.loads(event_str)
+  if 'context' in event:
+    context = event['context']
+    event = json.loads(f.readline())
+  observation_id = int(event['observation'])
+  features = []
+  for ts in tensor_specs:
+    features.append(read_tensor(f, ts))
+  f.readline()
+  score = None
+  if score_spec is not None:
+    score_header = json.loads(f.readline())
+    assert int(score_header['outcome']) == observation_id
+    score = read_tensor(f, score_spec)
+    f.readline()
+  return context, observation_id, features, score
+
 
 def read_stream(fname: str):
-  with open(fname, 'rb') as f:
-    header = json.loads(f.readline())
-    tensor_specs = [TensorSpec.from_dict(ts) for ts in header['features']]
-    score_spec = TensorSpec.from_dict(
-        header['score']) if 'score' in header else None
+  with io.BufferedReader(io.FileIO(fname, 'rb')) as f:
+    tensor_specs, score_spec, _ = read_header(f)
     context = None
     while event_str := f.readline():
-      event = json.loads(event_str)
-      if 'context' in event:
-        context = event['context']
-        continue
-      observation_id = int(event['observation'])
-      features = []
-      for ts in tensor_specs:
-        features.append(read_tensor(f, ts))
-      f.readline()
-      score = None
-      if score_spec is not None:
-        score_header = json.loads(f.readline())
-        assert int(score_header['outcome']) == observation_id
-        score = read_tensor(f, score_spec)
-        f.readline()
+      context, observation_id, features, score = read_one_observation(
+          context, event_str, f, tensor_specs, score_spec)
       yield context, observation_id, features, score