[llvm] 954cf9a - [mlgo][nfc] Refactor the log_reader.py utility
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 2 14:43:32 PST 2023
Author: Mircea Trofin
Date: 2023-02-02T14:41:55-08:00
New Revision: 954cf9a7d485b729198c0f6aff1580b83f273397
URL: https://github.com/llvm/llvm-project/commit/954cf9a7d485b729198c0f6aff1580b83f273397
DIFF: https://github.com/llvm/llvm-project/commit/954cf9a7d485b729198c0f6aff1580b83f273397.diff
LOG: [mlgo][nfc] Refactor the log_reader.py utility
Small refactoring in preparation for tests for the interactive mode.
This allows reading the header, and performing observations, as explicit
steps. The latter is in particular necessary because the exit condition
for the interactive host will be that the child process (the compiler)
exited.
Added:
Modified:
llvm/lib/Analysis/models/log_reader.py
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/models/log_reader.py b/llvm/lib/Analysis/models/log_reader.py
index 08342e565c0be..0fe090cb32ae8 100644
--- a/llvm/lib/Analysis/models/log_reader.py
+++ b/llvm/lib/Analysis/models/log_reader.py
@@ -4,10 +4,11 @@
"""
import ctypes
import dataclasses
+import io
import json
import math
import sys
-import typing
+from typing import Optional
_element_types = {
'float': ctypes.c_float,
@@ -66,7 +67,7 @@ def __getitem__(self, index):
return self._view[index]
-def read_tensor(fs: typing.BinaryIO, ts: TensorSpec) -> TensorValue:
+def read_tensor(fs: io.BufferedReader, ts: TensorSpec) -> TensorValue:
size = math.prod(ts.shape) * ctypes.sizeof(ts.element_type)
data = fs.read(size)
return TensorValue(ts, data)
@@ -75,30 +76,46 @@ def read_tensor(fs: typing.BinaryIO, ts: TensorSpec) -> TensorValue:
def pretty_print_tensor_value(tv: TensorValue):
print(f'{tv.spec().name}: {",".join([str(v) for v in tv])}')
+def read_header(f: io.BufferedReader):
+ header = json.loads(f.readline())
+ tensor_specs = [TensorSpec.from_dict(ts) for ts in header['features']]
+ score_spec = TensorSpec.from_dict(
+ header['score']) if 'score' in header else None
+ advice_spec = TensorSpec.from_dict(
+ header['advice']) if 'advice' in header else None
+ return tensor_specs, score_spec, advice_spec
+
+
+def read_one_observation(context: Optional[str],
+ event_str: str,
+ f: io.BufferedReader,
+ tensor_specs: list[TensorSpec],
+ score_spec: Optional[TensorSpec]):
+ event = json.loads(event_str)
+ if 'context' in event:
+ context = event['context']
+ event = json.loads(f.readline())
+ observation_id = int(event['observation'])
+ features = []
+ for ts in tensor_specs:
+ features.append(read_tensor(f, ts))
+ f.readline()
+ score = None
+ if score_spec is not None:
+ score_header = json.loads(f.readline())
+ assert int(score_header['outcome']) == observation_id
+ score = read_tensor(f, score_spec)
+ f.readline()
+ return context, observation_id, features, score
+
def read_stream(fname: str):
- with open(fname, 'rb') as f:
- header = json.loads(f.readline())
- tensor_specs = [TensorSpec.from_dict(ts) for ts in header['features']]
- score_spec = TensorSpec.from_dict(
- header['score']) if 'score' in header else None
+ with io.BufferedReader(io.FileIO(fname, 'rb')) as f:
+ tensor_specs, score_spec, _ = read_header(f)
context = None
while event_str := f.readline():
- event = json.loads(event_str)
- if 'context' in event:
- context = event['context']
- continue
- observation_id = int(event['observation'])
- features = []
- for ts in tensor_specs:
- features.append(read_tensor(f, ts))
- f.readline()
- score = None
- if score_spec is not None:
- score_header = json.loads(f.readline())
- assert int(score_header['outcome']) == observation_id
- score = read_tensor(f, score_spec)
- f.readline()
+ context, observation_id, features, score = read_one_observation(
+ context, event_str, f, tensor_specs, score_spec)
yield context, observation_id, features, score
More information about the llvm-commits
mailing list