550 lines
20 KiB
Python
550 lines
20 KiB
Python
"""
|
|
Comprehensive unit tests for timeframe aggregation utilities.
|
|
|
|
This test suite verifies:
|
|
1. Mathematical equivalence to pandas resampling
|
|
2. Bar timestamp correctness (end vs start mode)
|
|
3. OHLCV aggregation accuracy
|
|
4. Edge cases (empty data, single data point, gaps)
|
|
5. Performance benchmarks
|
|
6. MinuteDataBuffer functionality
|
|
"""
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
import numpy as np
|
|
from datetime import datetime, timedelta
|
|
from typing import List, Dict, Union
|
|
import time
|
|
|
|
# Import the utilities to test
|
|
from IncrementalTrader.utils import (
|
|
aggregate_minute_data_to_timeframe,
|
|
parse_timeframe_to_minutes,
|
|
get_latest_complete_bar,
|
|
MinuteDataBuffer,
|
|
TimeframeError
|
|
)
|
|
|
|
|
|
class TestTimeframeParser:
|
|
"""Test timeframe string parsing functionality."""
|
|
|
|
def test_valid_timeframes(self):
|
|
"""Test parsing of valid timeframe strings."""
|
|
test_cases = [
|
|
("1min", 1),
|
|
("5min", 5),
|
|
("15min", 15),
|
|
("30min", 30),
|
|
("1h", 60),
|
|
("2h", 120),
|
|
("4h", 240),
|
|
("1d", 1440),
|
|
("7d", 10080),
|
|
("1w", 10080),
|
|
]
|
|
|
|
for timeframe_str, expected_minutes in test_cases:
|
|
result = parse_timeframe_to_minutes(timeframe_str)
|
|
assert result == expected_minutes, f"Failed for {timeframe_str}: expected {expected_minutes}, got {result}"
|
|
|
|
def test_case_insensitive(self):
|
|
"""Test that parsing is case insensitive."""
|
|
assert parse_timeframe_to_minutes("15MIN") == 15
|
|
assert parse_timeframe_to_minutes("1H") == 60
|
|
assert parse_timeframe_to_minutes("1D") == 1440
|
|
|
|
def test_invalid_timeframes(self):
|
|
"""Test that invalid timeframes raise appropriate errors."""
|
|
invalid_cases = [
|
|
"",
|
|
"invalid",
|
|
"15",
|
|
"min",
|
|
"0min",
|
|
"-5min",
|
|
"1.5h",
|
|
None,
|
|
123,
|
|
]
|
|
|
|
for invalid_timeframe in invalid_cases:
|
|
with pytest.raises(TimeframeError):
|
|
parse_timeframe_to_minutes(invalid_timeframe)
|
|
|
|
|
|
class TestAggregation:
|
|
"""Test core aggregation functionality."""
|
|
|
|
@pytest.fixture
|
|
def sample_minute_data(self):
|
|
"""Create sample minute data for testing."""
|
|
start_time = pd.Timestamp('2024-01-01 09:00:00')
|
|
data = []
|
|
|
|
for i in range(60): # 1 hour of minute data
|
|
timestamp = start_time + pd.Timedelta(minutes=i)
|
|
data.append({
|
|
'timestamp': timestamp,
|
|
'open': 100.0 + i * 0.1,
|
|
'high': 100.5 + i * 0.1,
|
|
'low': 99.5 + i * 0.1,
|
|
'close': 100.2 + i * 0.1,
|
|
'volume': 1000 + i * 10
|
|
})
|
|
|
|
return data
|
|
|
|
def test_empty_data(self):
|
|
"""Test aggregation with empty data."""
|
|
result = aggregate_minute_data_to_timeframe([], "15min")
|
|
assert result == []
|
|
|
|
def test_single_data_point(self):
|
|
"""Test aggregation with single data point."""
|
|
data = [{
|
|
'timestamp': pd.Timestamp('2024-01-01 09:00:00'),
|
|
'open': 100.0,
|
|
'high': 101.0,
|
|
'low': 99.0,
|
|
'close': 100.5,
|
|
'volume': 1000
|
|
}]
|
|
|
|
# Should not produce any complete bars for 15min timeframe
|
|
result = aggregate_minute_data_to_timeframe(data, "15min")
|
|
assert len(result) == 0
|
|
|
|
def test_15min_aggregation_end_timestamps(self, sample_minute_data):
|
|
"""Test 15-minute aggregation with end timestamps."""
|
|
result = aggregate_minute_data_to_timeframe(sample_minute_data, "15min", "end")
|
|
|
|
# Should have 4 complete 15-minute bars
|
|
assert len(result) == 4
|
|
|
|
# Check timestamps are bar end times
|
|
expected_timestamps = [
|
|
pd.Timestamp('2024-01-01 09:15:00'),
|
|
pd.Timestamp('2024-01-01 09:30:00'),
|
|
pd.Timestamp('2024-01-01 09:45:00'),
|
|
pd.Timestamp('2024-01-01 10:00:00'),
|
|
]
|
|
|
|
for i, expected_ts in enumerate(expected_timestamps):
|
|
assert result[i]['timestamp'] == expected_ts
|
|
|
|
def test_15min_aggregation_start_timestamps(self, sample_minute_data):
|
|
"""Test 15-minute aggregation with start timestamps."""
|
|
result = aggregate_minute_data_to_timeframe(sample_minute_data, "15min", "start")
|
|
|
|
# Should have 4 complete 15-minute bars
|
|
assert len(result) == 4
|
|
|
|
# Check timestamps are bar start times
|
|
expected_timestamps = [
|
|
pd.Timestamp('2024-01-01 09:00:00'),
|
|
pd.Timestamp('2024-01-01 09:15:00'),
|
|
pd.Timestamp('2024-01-01 09:30:00'),
|
|
pd.Timestamp('2024-01-01 09:45:00'),
|
|
]
|
|
|
|
for i, expected_ts in enumerate(expected_timestamps):
|
|
assert result[i]['timestamp'] == expected_ts
|
|
|
|
def test_ohlcv_aggregation_correctness(self, sample_minute_data):
|
|
"""Test that OHLCV aggregation follows correct rules."""
|
|
result = aggregate_minute_data_to_timeframe(sample_minute_data, "15min", "end")
|
|
|
|
# Test first 15-minute bar (minutes 0-14)
|
|
first_bar = result[0]
|
|
|
|
# Open should be first open (minute 0)
|
|
assert first_bar['open'] == 100.0
|
|
|
|
# High should be maximum high in period
|
|
expected_high = max(100.5 + i * 0.1 for i in range(15))
|
|
assert first_bar['high'] == expected_high
|
|
|
|
# Low should be minimum low in period
|
|
expected_low = min(99.5 + i * 0.1 for i in range(15))
|
|
assert first_bar['low'] == expected_low
|
|
|
|
# Close should be last close (minute 14)
|
|
assert first_bar['close'] == 100.2 + 14 * 0.1
|
|
|
|
# Volume should be sum of all volumes
|
|
expected_volume = sum(1000 + i * 10 for i in range(15))
|
|
assert first_bar['volume'] == expected_volume
|
|
|
|
def test_pandas_equivalence(self, sample_minute_data):
|
|
"""Test that aggregation matches pandas resampling exactly."""
|
|
# Convert to DataFrame for pandas comparison
|
|
df = pd.DataFrame(sample_minute_data)
|
|
df = df.set_index('timestamp')
|
|
|
|
# Pandas resampling
|
|
pandas_result = df.resample('15min', label='right').agg({
|
|
'open': 'first',
|
|
'high': 'max',
|
|
'low': 'min',
|
|
'close': 'last',
|
|
'volume': 'sum'
|
|
}).dropna()
|
|
|
|
# Our aggregation
|
|
our_result = aggregate_minute_data_to_timeframe(sample_minute_data, "15min", "end")
|
|
|
|
# Compare results
|
|
assert len(our_result) == len(pandas_result)
|
|
|
|
for i, (pandas_ts, pandas_row) in enumerate(pandas_result.iterrows()):
|
|
our_bar = our_result[i]
|
|
|
|
assert our_bar['timestamp'] == pandas_ts
|
|
assert abs(our_bar['open'] - pandas_row['open']) < 1e-10
|
|
assert abs(our_bar['high'] - pandas_row['high']) < 1e-10
|
|
assert abs(our_bar['low'] - pandas_row['low']) < 1e-10
|
|
assert abs(our_bar['close'] - pandas_row['close']) < 1e-10
|
|
assert abs(our_bar['volume'] - pandas_row['volume']) < 1e-10
|
|
|
|
def test_different_timeframes(self, sample_minute_data):
|
|
"""Test aggregation for different timeframes."""
|
|
timeframes = ["5min", "15min", "30min", "1h"]
|
|
expected_counts = [12, 4, 2, 1]
|
|
|
|
for timeframe, expected_count in zip(timeframes, expected_counts):
|
|
result = aggregate_minute_data_to_timeframe(sample_minute_data, timeframe)
|
|
assert len(result) == expected_count, f"Failed for {timeframe}: expected {expected_count}, got {len(result)}"
|
|
|
|
def test_invalid_data_validation(self):
|
|
"""Test validation of invalid input data."""
|
|
# Test non-list input
|
|
with pytest.raises(ValueError):
|
|
aggregate_minute_data_to_timeframe("not a list", "15min")
|
|
|
|
# Test missing required fields
|
|
invalid_data = [{'timestamp': pd.Timestamp('2024-01-01 09:00:00'), 'open': 100}] # Missing fields
|
|
with pytest.raises(ValueError):
|
|
aggregate_minute_data_to_timeframe(invalid_data, "15min")
|
|
|
|
# Test invalid timestamp mode
|
|
valid_data = [{
|
|
'timestamp': pd.Timestamp('2024-01-01 09:00:00'),
|
|
'open': 100, 'high': 101, 'low': 99, 'close': 100.5, 'volume': 1000
|
|
}]
|
|
with pytest.raises(ValueError):
|
|
aggregate_minute_data_to_timeframe(valid_data, "15min", "invalid_mode")
|
|
|
|
|
|
class TestLatestCompleteBar:
|
|
"""Test latest complete bar functionality."""
|
|
|
|
@pytest.fixture
|
|
def sample_data_with_incomplete(self):
|
|
"""Create sample data with incomplete last bar."""
|
|
start_time = pd.Timestamp('2024-01-01 09:00:00')
|
|
data = []
|
|
|
|
# 17 minutes of data (1 complete 15min bar + 2 minutes of incomplete bar)
|
|
for i in range(17):
|
|
timestamp = start_time + pd.Timedelta(minutes=i)
|
|
data.append({
|
|
'timestamp': timestamp,
|
|
'open': 100.0 + i * 0.1,
|
|
'high': 100.5 + i * 0.1,
|
|
'low': 99.5 + i * 0.1,
|
|
'close': 100.2 + i * 0.1,
|
|
'volume': 1000 + i * 10
|
|
})
|
|
|
|
return data
|
|
|
|
def test_latest_complete_bar_end_mode(self, sample_data_with_incomplete):
|
|
"""Test getting latest complete bar with end timestamps."""
|
|
result = get_latest_complete_bar(sample_data_with_incomplete, "15min", "end")
|
|
|
|
assert result is not None
|
|
assert result['timestamp'] == pd.Timestamp('2024-01-01 09:15:00')
|
|
|
|
def test_latest_complete_bar_start_mode(self, sample_data_with_incomplete):
|
|
"""Test getting latest complete bar with start timestamps."""
|
|
result = get_latest_complete_bar(sample_data_with_incomplete, "15min", "start")
|
|
|
|
assert result is not None
|
|
assert result['timestamp'] == pd.Timestamp('2024-01-01 09:00:00')
|
|
|
|
def test_no_complete_bars(self):
|
|
"""Test when no complete bars are available."""
|
|
# Only 5 minutes of data for 15min timeframe
|
|
data = []
|
|
start_time = pd.Timestamp('2024-01-01 09:00:00')
|
|
|
|
for i in range(5):
|
|
timestamp = start_time + pd.Timedelta(minutes=i)
|
|
data.append({
|
|
'timestamp': timestamp,
|
|
'open': 100.0,
|
|
'high': 101.0,
|
|
'low': 99.0,
|
|
'close': 100.5,
|
|
'volume': 1000
|
|
})
|
|
|
|
result = get_latest_complete_bar(data, "15min")
|
|
assert result is None
|
|
|
|
def test_empty_data(self):
|
|
"""Test with empty data."""
|
|
result = get_latest_complete_bar([], "15min")
|
|
assert result is None
|
|
|
|
|
|
class TestMinuteDataBuffer:
|
|
"""Test MinuteDataBuffer functionality."""
|
|
|
|
def test_buffer_initialization(self):
|
|
"""Test buffer initialization."""
|
|
buffer = MinuteDataBuffer(max_size=100)
|
|
assert buffer.max_size == 100
|
|
assert buffer.size() == 0
|
|
assert not buffer.is_full()
|
|
assert buffer.get_time_range() is None
|
|
|
|
def test_invalid_initialization(self):
|
|
"""Test invalid buffer initialization."""
|
|
with pytest.raises(ValueError):
|
|
MinuteDataBuffer(max_size=0)
|
|
|
|
with pytest.raises(ValueError):
|
|
MinuteDataBuffer(max_size=-10)
|
|
|
|
def test_add_data(self):
|
|
"""Test adding data to buffer."""
|
|
buffer = MinuteDataBuffer(max_size=10)
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00')
|
|
ohlcv_data = {'open': 100, 'high': 101, 'low': 99, 'close': 100.5, 'volume': 1000}
|
|
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
assert buffer.size() == 1
|
|
assert not buffer.is_full()
|
|
|
|
time_range = buffer.get_time_range()
|
|
assert time_range == (timestamp, timestamp)
|
|
|
|
def test_buffer_overflow(self):
|
|
"""Test buffer behavior when max size is exceeded."""
|
|
buffer = MinuteDataBuffer(max_size=3)
|
|
|
|
# Add 5 data points
|
|
for i in range(5):
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00') + pd.Timedelta(minutes=i)
|
|
ohlcv_data = {'open': 100, 'high': 101, 'low': 99, 'close': 100.5, 'volume': 1000}
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
# Should only keep last 3
|
|
assert buffer.size() == 3
|
|
assert buffer.is_full()
|
|
|
|
# Should have data from minutes 2, 3, 4
|
|
time_range = buffer.get_time_range()
|
|
expected_start = pd.Timestamp('2024-01-01 09:02:00')
|
|
expected_end = pd.Timestamp('2024-01-01 09:04:00')
|
|
assert time_range == (expected_start, expected_end)
|
|
|
|
def test_get_data_with_lookback(self):
|
|
"""Test getting data with lookback limit."""
|
|
buffer = MinuteDataBuffer(max_size=10)
|
|
|
|
# Add 5 data points
|
|
for i in range(5):
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00') + pd.Timedelta(minutes=i)
|
|
ohlcv_data = {'open': 100 + i, 'high': 101 + i, 'low': 99 + i, 'close': 100.5 + i, 'volume': 1000}
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
# Get last 3 minutes
|
|
data = buffer.get_data(lookback_minutes=3)
|
|
assert len(data) == 3
|
|
|
|
# Should be minutes 2, 3, 4
|
|
assert data[0]['open'] == 102
|
|
assert data[1]['open'] == 103
|
|
assert data[2]['open'] == 104
|
|
|
|
# Get all data
|
|
all_data = buffer.get_data()
|
|
assert len(all_data) == 5
|
|
|
|
def test_aggregate_to_timeframe(self):
|
|
"""Test aggregating buffer data to timeframe."""
|
|
buffer = MinuteDataBuffer(max_size=100)
|
|
|
|
# Add 30 minutes of data
|
|
for i in range(30):
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00') + pd.Timedelta(minutes=i)
|
|
ohlcv_data = {
|
|
'open': 100.0 + i * 0.1,
|
|
'high': 100.5 + i * 0.1,
|
|
'low': 99.5 + i * 0.1,
|
|
'close': 100.2 + i * 0.1,
|
|
'volume': 1000 + i * 10
|
|
}
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
# Aggregate to 15min
|
|
bars_15m = buffer.aggregate_to_timeframe("15min")
|
|
assert len(bars_15m) == 2 # 2 complete 15-minute bars
|
|
|
|
# Test with lookback limit
|
|
bars_15m_limited = buffer.aggregate_to_timeframe("15min", lookback_bars=1)
|
|
assert len(bars_15m_limited) == 1
|
|
|
|
def test_get_latest_complete_bar(self):
|
|
"""Test getting latest complete bar from buffer."""
|
|
buffer = MinuteDataBuffer(max_size=100)
|
|
|
|
# Add 17 minutes of data (1 complete 15min bar + 2 minutes)
|
|
for i in range(17):
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00') + pd.Timedelta(minutes=i)
|
|
ohlcv_data = {
|
|
'open': 100.0 + i * 0.1,
|
|
'high': 100.5 + i * 0.1,
|
|
'low': 99.5 + i * 0.1,
|
|
'close': 100.2 + i * 0.1,
|
|
'volume': 1000 + i * 10
|
|
}
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
# Should get the complete 15-minute bar
|
|
latest_bar = buffer.get_latest_complete_bar("15min")
|
|
assert latest_bar is not None
|
|
assert latest_bar['timestamp'] == pd.Timestamp('2024-01-01 09:15:00')
|
|
|
|
def test_invalid_data_validation(self):
|
|
"""Test validation of invalid data."""
|
|
buffer = MinuteDataBuffer(max_size=10)
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00')
|
|
|
|
# Missing required field
|
|
with pytest.raises(ValueError):
|
|
buffer.add(timestamp, {'open': 100, 'high': 101}) # Missing low, close, volume
|
|
|
|
# Invalid data type
|
|
with pytest.raises(ValueError):
|
|
buffer.add(timestamp, {'open': 'invalid', 'high': 101, 'low': 99, 'close': 100.5, 'volume': 1000})
|
|
|
|
# Invalid lookback
|
|
buffer.add(timestamp, {'open': 100, 'high': 101, 'low': 99, 'close': 100.5, 'volume': 1000})
|
|
with pytest.raises(ValueError):
|
|
buffer.get_data(lookback_minutes=0)
|
|
|
|
def test_clear_buffer(self):
|
|
"""Test clearing buffer."""
|
|
buffer = MinuteDataBuffer(max_size=10)
|
|
|
|
# Add some data
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00')
|
|
ohlcv_data = {'open': 100, 'high': 101, 'low': 99, 'close': 100.5, 'volume': 1000}
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
assert buffer.size() == 1
|
|
|
|
# Clear buffer
|
|
buffer.clear()
|
|
|
|
assert buffer.size() == 0
|
|
assert buffer.get_time_range() is None
|
|
|
|
def test_buffer_repr(self):
|
|
"""Test buffer string representation."""
|
|
buffer = MinuteDataBuffer(max_size=10)
|
|
|
|
# Empty buffer
|
|
repr_empty = repr(buffer)
|
|
assert "size=0" in repr_empty
|
|
assert "empty" in repr_empty
|
|
|
|
# Add data
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00')
|
|
ohlcv_data = {'open': 100, 'high': 101, 'low': 99, 'close': 100.5, 'volume': 1000}
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
repr_with_data = repr(buffer)
|
|
assert "size=1" in repr_with_data
|
|
assert "2024-01-01 09:00:00" in repr_with_data
|
|
|
|
|
|
class TestPerformance:
|
|
"""Test performance characteristics of the utilities."""
|
|
|
|
def test_aggregation_performance(self):
|
|
"""Test aggregation performance with large datasets."""
|
|
# Create large dataset (1 week of minute data)
|
|
start_time = pd.Timestamp('2024-01-01 00:00:00')
|
|
large_data = []
|
|
|
|
for i in range(7 * 24 * 60): # 1 week of minutes
|
|
timestamp = start_time + pd.Timedelta(minutes=i)
|
|
large_data.append({
|
|
'timestamp': timestamp,
|
|
'open': 100.0 + np.random.randn() * 0.1,
|
|
'high': 100.5 + np.random.randn() * 0.1,
|
|
'low': 99.5 + np.random.randn() * 0.1,
|
|
'close': 100.2 + np.random.randn() * 0.1,
|
|
'volume': 1000 + np.random.randint(0, 500)
|
|
})
|
|
|
|
# Time the aggregation
|
|
start_time = time.time()
|
|
result = aggregate_minute_data_to_timeframe(large_data, "15min")
|
|
end_time = time.time()
|
|
|
|
aggregation_time = end_time - start_time
|
|
|
|
# Should complete within reasonable time (< 1 second for 1 week of data)
|
|
assert aggregation_time < 1.0, f"Aggregation took too long: {aggregation_time:.3f}s"
|
|
|
|
# Verify result size
|
|
expected_bars = 7 * 24 * 4 # 7 days * 24 hours * 4 15-min bars per hour
|
|
assert len(result) == expected_bars
|
|
|
|
def test_buffer_performance(self):
|
|
"""Test buffer performance with frequent updates."""
|
|
buffer = MinuteDataBuffer(max_size=1440) # 24 hours
|
|
|
|
# Time adding 1 hour of data
|
|
start_time = time.time()
|
|
|
|
for i in range(60):
|
|
timestamp = pd.Timestamp('2024-01-01 09:00:00') + pd.Timedelta(minutes=i)
|
|
ohlcv_data = {
|
|
'open': 100.0 + i * 0.1,
|
|
'high': 100.5 + i * 0.1,
|
|
'low': 99.5 + i * 0.1,
|
|
'close': 100.2 + i * 0.1,
|
|
'volume': 1000 + i * 10
|
|
}
|
|
buffer.add(timestamp, ohlcv_data)
|
|
|
|
end_time = time.time()
|
|
|
|
add_time = end_time - start_time
|
|
|
|
# Should be very fast (< 0.1 seconds for 60 additions)
|
|
assert add_time < 0.1, f"Buffer additions took too long: {add_time:.3f}s"
|
|
|
|
# Time aggregation
|
|
start_time = time.time()
|
|
bars = buffer.aggregate_to_timeframe("15min")
|
|
end_time = time.time()
|
|
|
|
agg_time = end_time - start_time
|
|
|
|
# Should be fast (< 0.01 seconds)
|
|
assert agg_time < 0.01, f"Buffer aggregation took too long: {agg_time:.3f}s"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run tests if script is executed directly
|
|
pytest.main([__file__, "-v"]) |