orderflow_backtest/level_parser.py

88 lines
2.7 KiB
Python
Raw Normal View History

"""Ultra-fast level parsing for strings like:
"[['110173.4', '0.0000454', '0', '4'], ['110177.1', '0', '0', '0'], ...]"
"""
2025-09-10 15:39:16 +08:00
from typing import List, Tuple, Any
2025-09-10 15:39:16 +08:00
def normalize_levels(levels: Any) -> List[List[float]]:
"""
Return [[price, size], ...] with size > 0 only (floats).
Assumes 'levels' is a single-quoted list-of-lists string as above.
2025-09-10 15:39:16 +08:00
"""
pairs = _fast_pairs(levels)
# filter strictly positive sizes
return [[p, s] for (p, s) in pairs if s > 0.0]
2025-09-10 15:39:16 +08:00
def parse_levels_including_zeros(levels: Any) -> List[Tuple[float, float]]:
"""
Return [(price, size), ...] (floats), preserving zeros for deletions.
Assumes 'levels' is a single-quoted list-of-lists string as above.
"""
return _fast_pairs(levels)
2025-09-10 15:39:16 +08:00
# ----------------- internal: fast path -----------------
2025-09-10 15:39:16 +08:00
def _fast_pairs(levels: Any) -> List[Tuple[float, float]]:
2025-09-10 15:39:16 +08:00
"""
Extremely fast parser for inputs like:
"[['110173.4','0.0000454','0','4'],['110177.1','0','0','0'], ...]"
Keeps only the first two fields from each row and converts to float.
2025-09-10 15:39:16 +08:00
"""
if not levels:
2025-09-10 15:39:16 +08:00
return []
# If already a list (rare in your pipeline), fall back to simple handling
if isinstance(levels, (list, tuple)):
out: List[Tuple[float, float]] = []
for item in levels:
if isinstance(item, (list, tuple)) and len(item) >= 2:
try:
p = float(item[0]); s = float(item[1])
out.append((p, s))
except Exception:
continue
return out
# Expect a string: strip outer brackets and single quotes fast
s = str(levels).strip()
if len(s) < 5: # too short to contain "[[...]]"
2025-09-10 15:39:16 +08:00
return []
# Remove the outermost [ and ] quickly (tolerant)
if s[0] == '[':
s = s[1:]
if s and s[-1] == ']':
s = s[:-1]
2025-09-10 15:39:16 +08:00
# Remove *all* single quotes (input uses single quotes, not JSON)
s = s.replace("'", "")
2025-09-10 15:39:16 +08:00
# Now s looks like: [[110173.4, 0.0000454, 0, 4], [110177.1, 0, 0, 0], ...]
# Split into rows on "],", then strip brackets/spaces per row
rows = s.split("],")
out: List[Tuple[float, float]] = []
2025-09-10 15:39:16 +08:00
for row in rows:
row = row.strip()
# strip any leading/trailing brackets/spaces
if row.startswith('['):
row = row[1:]
if row.endswith(']'):
row = row[:-1]
# fast split by commas and take first two fields
cols = row.split(',')
if len(cols) < 2:
continue
2025-09-10 15:39:16 +08:00
try:
p = float(cols[0].strip())
s_ = float(cols[1].strip())
out.append((p, s_))
2025-09-10 15:39:16 +08:00
except Exception:
continue
2025-09-10 15:39:16 +08:00
return out