adding metasupertrend data WIP
This commit is contained in:
parent
fcc0342fd8
commit
261ce54f37
@ -19,10 +19,10 @@ from scipy.signal import find_peaks
|
||||
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
||||
from matplotlib.figure import Figure
|
||||
import matplotlib
|
||||
|
||||
from trend_detector_simple import TrendDetectorSimple
|
||||
|
||||
class BitcoinPricePredictor:
|
||||
def __init__(self, db_path, timeframe, model=None, timesteps=10, batch_size=8, learning_rate=0.001, epochs=50):
|
||||
def __init__(self, db_path, timeframe, model=None, timesteps=10, batch_size=32, learning_rate=0.001, epochs=50):
|
||||
self.db_path = db_path
|
||||
self.engine = create_engine(f'sqlite:///{self.db_path}')
|
||||
self.timesteps = timesteps
|
||||
@ -37,8 +37,9 @@ class BitcoinPricePredictor:
|
||||
self.history = None
|
||||
self.scaler = None
|
||||
self.timeframe = timeframe
|
||||
self.feature_columns = ['Open', 'High', 'Low', 'Close', 'Volume',
|
||||
'HL_Ratio', 'SMA_7', 'SMA_21', 'Price_Change']
|
||||
self.feature_columns = ['open', 'high', 'low', 'close', 'volume',
|
||||
'hl_ratio', 'sma_7', 'sma_21', 'price_change']
|
||||
self.df = None
|
||||
|
||||
@staticmethod
|
||||
def reduce_mem_usage(df):
|
||||
@ -73,54 +74,73 @@ class BitcoinPricePredictor:
|
||||
return df
|
||||
|
||||
def add_essential_features(self, df):
|
||||
"""Add technical indicators and features to the dataframe."""
|
||||
print("Adding technical indicators and features...")
|
||||
|
||||
df = df.copy()
|
||||
|
||||
# Price ratio features
|
||||
df['HL_Ratio'] = (df['High'] / df['Low']).clip(lower=0.8, upper=1.2)
|
||||
df['hl_ratio'] = (df['high'] / df['low']).clip(lower=0.8, upper=1.2)
|
||||
|
||||
# Moving averages with different timeframes
|
||||
df['SMA_7'] = df['Close'].rolling(window=7, min_periods=1).mean()
|
||||
df['SMA_21'] = df['Close'].rolling(window=21, min_periods=1).mean()
|
||||
df['SMA_50'] = df['Close'].rolling(window=50, min_periods=1).mean()
|
||||
# Moving averages
|
||||
df['sma_7'] = df['close'].rolling(window=7, min_periods=1).mean()
|
||||
df['sma_21'] = df['close'].rolling(window=21, min_periods=1).mean()
|
||||
df['sma_50'] = df['close'].rolling(window=50, min_periods=1).mean()
|
||||
|
||||
# Exponential moving averages
|
||||
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
|
||||
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
|
||||
df['ema_12'] = df['close'].ewm(span=12, adjust=False).mean()
|
||||
df['ema_26'] = df['close'].ewm(span=26, adjust=False).mean()
|
||||
|
||||
# MACD
|
||||
df['MACD'] = df['EMA_12'] - df['EMA_26']
|
||||
df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
|
||||
df['macd'] = df['ema_12'] - df['ema_26']
|
||||
df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
|
||||
|
||||
# Relative Strength Index (RSI)
|
||||
delta = df['Close'].diff()
|
||||
# RSI
|
||||
delta = df['close'].diff()
|
||||
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
|
||||
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
|
||||
rs = gain / loss
|
||||
df['RSI'] = 100 - (100 / (1 + rs))
|
||||
df['rsi'] = 100 - (100 / (1 + rs))
|
||||
|
||||
# Bollinger Bands
|
||||
df['BB_Middle'] = df['Close'].rolling(window=20).mean()
|
||||
df['BB_Std'] = df['Close'].rolling(window=20).std()
|
||||
df['BB_Upper'] = df['BB_Middle'] + 2 * df['BB_Std']
|
||||
df['BB_Lower'] = df['BB_Middle'] - 2 * df['BB_Std']
|
||||
df['BB_Width'] = (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle']
|
||||
df['bb_middle'] = df['close'].rolling(window=20).mean()
|
||||
df['bb_std'] = df['close'].rolling(window=20).std()
|
||||
df['bb_upper'] = df['bb_middle'] + 2 * df['bb_std']
|
||||
df['bb_lower'] = df['bb_middle'] - 2 * df['bb_std']
|
||||
df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']
|
||||
|
||||
# Price changes at different timeframes
|
||||
df['Price_Change_1d'] = df['Close'].pct_change(periods=1).clip(lower=-0.5, upper=0.5)
|
||||
df['Price_Change_3d'] = df['Close'].pct_change(periods=3).clip(lower=-0.5, upper=0.5)
|
||||
df['Price_Change_7d'] = df['Close'].pct_change(periods=7).clip(lower=-0.5, upper=0.5)
|
||||
# Price changes
|
||||
df['price_change_1d'] = df['close'].pct_change(periods=1).clip(lower=-0.5, upper=0.5)
|
||||
df['price_change_3d'] = df['close'].pct_change(periods=3).clip(lower=-0.5, upper=0.5)
|
||||
df['price_change_7d'] = df['close'].pct_change(periods=7).clip(lower=-0.5, upper=0.5)
|
||||
|
||||
# Volatility
|
||||
df['Volatility'] = df['Close'].rolling(window=14).std() / df['Close'].rolling(window=14).mean()
|
||||
df['volatility'] = df['close'].rolling(window=14).std() / df['close'].rolling(window=14).mean()
|
||||
|
||||
# Clean up any NaN or infinite values
|
||||
# Get trend indicators from TrendDetector
|
||||
# TrendDetector already expects lowercase columns, so no need for conversion
|
||||
if 'datetime' not in df.columns:
|
||||
df['datetime'] = df.index
|
||||
|
||||
trend_detector = TrendDetectorSimple(df)
|
||||
trend_data, trend_analysis = trend_detector.detect_trends()
|
||||
|
||||
# Add supertrend signals
|
||||
for i, st in enumerate(trend_analysis['supertrend']):
|
||||
df[f'supertrend_{i+1}'] = st['results']['trend']
|
||||
|
||||
# Add meta-supertrend consensus
|
||||
meta_results = trend_detector.calculate_metasupertrend(df, trend_analysis['supertrend'])
|
||||
df['metasupertrend'] = meta_results['meta_trends']
|
||||
|
||||
# Add SMA crossover signals
|
||||
df['sma_cross'] = np.where(trend_analysis['sma']['7'] > trend_analysis['sma']['15'], 1, -1)
|
||||
|
||||
# Clean up NaN or infinite values
|
||||
df = df.fillna(0)
|
||||
df = df.replace([np.inf, -np.inf], 0)
|
||||
|
||||
# Update feature columns list
|
||||
self.feature_columns = [col for col in df.columns if col not in ['Next_Period_Return', 'Next_Period_Up']]
|
||||
# Update feature columns list - exclude non-numeric columns
|
||||
self.feature_columns = [col for col in df.columns if col not in ['next_period_return', 'next_period_up', 'datetime']]
|
||||
|
||||
print(f"Shape after adding features: {df.shape}")
|
||||
return df
|
||||
@ -135,18 +155,15 @@ class BitcoinPricePredictor:
|
||||
|
||||
def create_sequences_for_prediction(self, data):
|
||||
"""Create sequences of data for prediction without targets."""
|
||||
x = []
|
||||
for i in range(len(data) - self.timesteps):
|
||||
x.append(data[i:i + self.timesteps])
|
||||
return np.array(x, dtype=np.float32)
|
||||
return np.array([data[i:i + self.timesteps] for i in range(len(data) - self.timesteps)], dtype=np.float32)
|
||||
|
||||
def create_model(self, input_shape):
|
||||
"""Create and compile the LSTM model architecture."""
|
||||
model = Sequential([
|
||||
LSTM(64, return_sequences=True, input_shape=input_shape,
|
||||
recurrent_dropout=0.2, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=1e-5, l2=1e-4)),
|
||||
recurrent_dropout=0.2, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
||||
Dropout(0.3),
|
||||
LSTM(32, return_sequences=True, recurrent_dropout=0.1, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=1e-5, l2=1e-4)),
|
||||
LSTM(32, return_sequences=True, recurrent_dropout=0.1, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
||||
Dropout(0.2),
|
||||
LSTM(16),
|
||||
Dropout(0.2),
|
||||
@ -162,39 +179,68 @@ class BitcoinPricePredictor:
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
def load_data(self):
|
||||
import pandas as pd
|
||||
import sqlite3
|
||||
def load_data_csv(self, file_path):
|
||||
"""Load Bitcoin price data from a CSV file."""
|
||||
try:
|
||||
# Read the CSV file
|
||||
self.df = pd.read_csv(file_path)
|
||||
|
||||
# Convert column names to lowercase
|
||||
self.df.columns = self.df.columns.str.lower()
|
||||
|
||||
# Convert timestamp to datetime
|
||||
self.df['timestamp'] = pd.to_datetime(self.df['timestamp'])
|
||||
self.df.set_index('timestamp', inplace=True)
|
||||
|
||||
if self.df is not None and not self.df.empty:
|
||||
print(f"Data loaded successfully from CSV. Shape: {self.df.shape}")
|
||||
else:
|
||||
print("Failed to load data. DataFrame is empty or None.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error loading CSV data: {str(e)}")
|
||||
self.df = None
|
||||
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
|
||||
self.df = pd.read_sql_query("SELECT * FROM bitcoin_data", conn)
|
||||
|
||||
if self.df is not None and not self.df.empty:
|
||||
print(f"Data loaded successfully. Shape: {self.df.shape}")
|
||||
else:
|
||||
print("Failed to load data. DataFrame is empty or None.")
|
||||
|
||||
conn.close()
|
||||
def load_data(self):
|
||||
"""Load data from SQLite database."""
|
||||
try:
|
||||
import sqlite3
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
|
||||
self.df = pd.read_sql_query("SELECT * FROM bitcoin_data", conn)
|
||||
|
||||
# Convert column names to lowercase
|
||||
self.df.columns = self.df.columns.str.lower()
|
||||
|
||||
if self.df is not None and not self.df.empty:
|
||||
print(f"Data loaded successfully. Shape: {self.df.shape}")
|
||||
else:
|
||||
print("Failed to load data. DataFrame is empty or None.")
|
||||
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"Error loading database data: {str(e)}")
|
||||
self.df = None
|
||||
|
||||
def prepare_data(self):
|
||||
"""Prepare data for model training."""
|
||||
start_time = time.time()
|
||||
|
||||
#df = self.resample_data(df)
|
||||
|
||||
self.df = self.add_essential_features(self.df)
|
||||
|
||||
# Define target variable - binary classification for price movement
|
||||
self.df['Next_Period_Return'] = self.df['Close'].pct_change(periods=1).shift(-1).clip(lower=-0.5, upper=0.5)
|
||||
self.df['Next_Period_Up'] = (self.df['Next_Period_Return'] > 0).astype(np.int8)
|
||||
self.df['next_period_return'] = self.df['close'].pct_change(periods=1).shift(-1).clip(lower=-0.5, upper=0.5)
|
||||
self.df['next_period_up'] = (self.df['next_period_return'] > 0).astype(np.int8)
|
||||
self.df = self.df.dropna()
|
||||
|
||||
# Scale features
|
||||
self.scaler = RobustScaler()
|
||||
self.df[self.feature_columns] = self.scaler.fit_transform(self.df[self.feature_columns])
|
||||
# Ensure we're only scaling numeric features
|
||||
numeric_features = [col for col in self.feature_columns if col != 'datetime' and pd.api.types.is_numeric_dtype(self.df[col])]
|
||||
self.df[numeric_features] = self.scaler.fit_transform(self.df[numeric_features])
|
||||
|
||||
# Create sequences for LSTM
|
||||
x, y = self.create_sequences(self.df[self.feature_columns].values, self.df['Next_Period_Up'].values)
|
||||
x, y = self.create_sequences(self.df[numeric_features].values, self.df['next_period_up'].values)
|
||||
print(f"Sequence shape: {x.shape}, Target shape: {y.shape}")
|
||||
|
||||
# Class balance check
|
||||
@ -204,31 +250,21 @@ class BitcoinPricePredictor:
|
||||
|
||||
# Train-test split (chronological)
|
||||
split_idx = int(len(x) * 0.8)
|
||||
x_train, x_test = x[:split_idx], x[split_idx:]
|
||||
y_train, y_test = y[:split_idx], y[split_idx:]
|
||||
|
||||
# Free memory
|
||||
# del self.df
|
||||
# gc.collect()
|
||||
|
||||
self.X_train, self.X_test = x_train, x_test
|
||||
self.y_train, self.y_test = y_train, y_test
|
||||
self.X_train, self.X_test = x[:split_idx], x[split_idx:]
|
||||
self.y_train, self.y_test = y[:split_idx], y[split_idx:]
|
||||
|
||||
print(f"Training data shape: {self.X_train.shape}, Test data shape: {self.X_test.shape}")
|
||||
|
||||
class_counts = np.bincount(self.y_train.astype(int))
|
||||
print(f"Class distribution in training data: 0={class_counts[0]}, 1={class_counts[1]}")
|
||||
|
||||
print(f"Data preparation completed in {time.time() - start_time:.2f} seconds")
|
||||
|
||||
def resample_data(self, df):
|
||||
"""Resample data to specified timeframe."""
|
||||
print(f"Resampling data to {self.timeframe} timeframe...")
|
||||
df = df.resample(self.timeframe).agg({
|
||||
'Open': 'first',
|
||||
'High': 'max',
|
||||
'Low': 'min',
|
||||
'Close': 'last',
|
||||
'Volume': 'sum'
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
'volume': 'sum'
|
||||
})
|
||||
print(f"Shape after resampling: {df.shape}")
|
||||
return df
|
||||
@ -236,10 +272,14 @@ class BitcoinPricePredictor:
|
||||
def load_new_data_from_model(self):
|
||||
"""Load new data and identify missing entries compared to the database."""
|
||||
new_data = pd.read_csv("./data/btcusd_1-min_data.csv")
|
||||
new_data['Timestamp'] = pd.to_datetime(new_data['Timestamp'], unit='s')
|
||||
# Convert column names to lowercase
|
||||
new_data.columns = new_data.columns.str.lower()
|
||||
new_data['timestamp'] = pd.to_datetime(new_data['timestamp'], unit='s')
|
||||
|
||||
existing_data = pd.read_sql('SELECT * FROM bitcoin_data', self.engine, index_col='Timestamp',
|
||||
parse_dates=['Timestamp'])
|
||||
existing_data = pd.read_sql('SELECT * FROM bitcoin_data', self.engine, index_col='timestamp',
|
||||
parse_dates=['timestamp'])
|
||||
# Convert column names to lowercase
|
||||
existing_data.columns = existing_data.columns.str.lower()
|
||||
|
||||
# Show the most recent entries in the database
|
||||
last_entries = existing_data.sort_index(ascending=False).head(10)
|
||||
@ -248,7 +288,7 @@ class BitcoinPricePredictor:
|
||||
|
||||
# Find missing data
|
||||
latest_timestamp = existing_data.index.max()
|
||||
missing_data = new_data[new_data['Timestamp'] > latest_timestamp]
|
||||
missing_data = new_data[new_data['timestamp'] > latest_timestamp]
|
||||
|
||||
print(f"New data total length: {len(new_data)}")
|
||||
print(f"Missing data entries: {len(missing_data)}")
|
||||
@ -271,36 +311,36 @@ class BitcoinPricePredictor:
|
||||
|
||||
return data
|
||||
|
||||
def make_predictions_w_reality(self, new_data):
|
||||
"""Make predictions and compare with actual outcomes."""
|
||||
# Ensure the 'Timestamp' column is present
|
||||
if 'Timestamp' not in new_data.columns:
|
||||
raise ValueError("Input data must contain a 'Timestamp' column.")
|
||||
def _prepare_prediction_data(self, new_data):
|
||||
"""Helper method to prepare data for prediction."""
|
||||
# Ensure the 'timestamp' column is present
|
||||
if 'timestamp' not in new_data.columns:
|
||||
raise ValueError("Input data must contain a 'timestamp' column.")
|
||||
|
||||
# Convert 'Timestamp' to datetime and set as index
|
||||
new_data['Timestamp'] = pd.to_datetime(new_data['Timestamp'], errors='coerce')
|
||||
new_data = new_data.dropna(subset=['Timestamp']) # Drop rows where Timestamp is NaT
|
||||
new_data.set_index('Timestamp', inplace=True)
|
||||
# Convert 'timestamp' to datetime and set as index
|
||||
new_data['timestamp'] = pd.to_datetime(new_data['timestamp'], errors='coerce')
|
||||
new_data = new_data.dropna(subset=['timestamp']) # Drop rows where Timestamp is NaT
|
||||
new_data.set_index('timestamp', inplace=True)
|
||||
|
||||
# Resample and aggregate data to the specified timeframe
|
||||
grouped_data = new_data.resample(self.timeframe).agg({
|
||||
'Open': 'first',
|
||||
'High': 'max',
|
||||
'Low': 'min',
|
||||
'Close': 'last',
|
||||
'Volume': 'sum'
|
||||
}).reset_index() # Reset index to preserve 'Timestamp' as a column
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
'volume': 'sum'
|
||||
}).reset_index() # Reset index to preserve 'timestamp' as a column
|
||||
|
||||
if grouped_data.empty:
|
||||
print("No new data found.")
|
||||
return None, None
|
||||
return None
|
||||
|
||||
# Preprocess the data
|
||||
grouped_data = self.preprocess_data(grouped_data)
|
||||
|
||||
if grouped_data.empty:
|
||||
print("No new data after preprocessing.")
|
||||
return None, None
|
||||
return None
|
||||
|
||||
# Create sequences for the model
|
||||
X = self.create_sequences_for_prediction(grouped_data[self.feature_columns].values)
|
||||
@ -308,6 +348,19 @@ class BitcoinPricePredictor:
|
||||
if len(X) == 0:
|
||||
print("Not enough data to create sequences.")
|
||||
return None, None
|
||||
|
||||
return X, grouped_data
|
||||
|
||||
def make_predictions_w_reality(self, new_data):
|
||||
"""Make predictions and compare with actual outcomes."""
|
||||
# Convert column names to lowercase if needed
|
||||
new_data.columns = new_data.columns.str.lower()
|
||||
|
||||
prepared_data = self._prepare_prediction_data(new_data)
|
||||
if prepared_data is None:
|
||||
return None, None
|
||||
|
||||
X, grouped_data = prepared_data
|
||||
|
||||
# Generate predictions
|
||||
predictions = self.model.predict(X)
|
||||
@ -316,57 +369,30 @@ class BitcoinPricePredictor:
|
||||
grouped_data = grouped_data.iloc[self.timesteps:] # Align with sequence length
|
||||
|
||||
# Add predictions to the grouped_data DataFrame
|
||||
grouped_data['Predictions'] = (predictions > 0.5).astype(int)
|
||||
grouped_data['Prediction_Probability'] = predictions
|
||||
grouped_data['predictions'] = (predictions > 0.5).astype(int)
|
||||
grouped_data['prediction_probability'] = predictions
|
||||
|
||||
# Calculate reality (actual price movement)
|
||||
grouped_data['Reality'] = (grouped_data['Close'].pct_change() > 0.005).astype(int)
|
||||
grouped_data['reality'] = (grouped_data['close'].pct_change() > 0.005).astype(int)
|
||||
|
||||
# Calculate accuracy
|
||||
grouped_data['Correct'] = (grouped_data['Predictions'] == grouped_data['Reality']).astype(int)
|
||||
accuracy = grouped_data['Correct'].mean()
|
||||
grouped_data['correct'] = (grouped_data['predictions'] == grouped_data['reality']).astype(int)
|
||||
accuracy = grouped_data['correct'].mean()
|
||||
print(f"Prediction accuracy: {accuracy:.2f}")
|
||||
|
||||
# Return predictions and reality
|
||||
return grouped_data[['Timestamp', 'Predictions', 'Prediction_Probability']], grouped_data['Reality']
|
||||
return grouped_data[['timestamp', 'predictions', 'prediction_probability']], grouped_data['reality']
|
||||
|
||||
def make_predictions(self, new_data):
|
||||
"""Make predictions on new data."""
|
||||
# Ensure the 'Timestamp' column is present
|
||||
if 'Timestamp' not in new_data.columns:
|
||||
raise ValueError("Input data must contain a 'Timestamp' column.")
|
||||
|
||||
# Convert 'Timestamp' to datetime and set as index
|
||||
new_data['Timestamp'] = pd.to_datetime(new_data['Timestamp'], errors='coerce')
|
||||
new_data = new_data.dropna(subset=['Timestamp']) # Drop rows where Timestamp is NaT
|
||||
new_data.set_index('Timestamp', inplace=True)
|
||||
|
||||
# Resample and aggregate data to the specified timeframe
|
||||
grouped_data = new_data.resample(self.timeframe).agg({
|
||||
'Open': 'first',
|
||||
'High': 'max',
|
||||
'Low': 'min',
|
||||
'Close': 'last',
|
||||
'Volume': 'sum'
|
||||
}).reset_index() # Reset index to preserve 'Timestamp' as a column
|
||||
|
||||
if grouped_data.empty:
|
||||
print("No new data found.")
|
||||
return None
|
||||
|
||||
# Preprocess the data
|
||||
grouped_data = self.preprocess_data(grouped_data)
|
||||
|
||||
if grouped_data.empty:
|
||||
print("No new data after preprocessing.")
|
||||
return None
|
||||
|
||||
# Create sequences for the model
|
||||
X = self.create_sequences_for_prediction(grouped_data[self.feature_columns].values)
|
||||
# Convert column names to lowercase if needed
|
||||
new_data.columns = new_data.columns.str.lower()
|
||||
|
||||
if len(X) == 0:
|
||||
print("Not enough data to create sequences.")
|
||||
prepared_data = self._prepare_prediction_data(new_data)
|
||||
if prepared_data is None:
|
||||
return None
|
||||
|
||||
X, grouped_data = prepared_data
|
||||
|
||||
# Generate predictions
|
||||
predictions = self.model.predict(X)
|
||||
@ -375,11 +401,11 @@ class BitcoinPricePredictor:
|
||||
grouped_data = grouped_data.iloc[self.timesteps:]
|
||||
|
||||
# Add predictions to the grouped_data DataFrame
|
||||
grouped_data['Predictions'] = (predictions > 0.5).astype(int)
|
||||
grouped_data['Prediction_Probability'] = predictions.flatten()
|
||||
grouped_data['predictions'] = (predictions > 0.5).astype(int)
|
||||
grouped_data['prediction_probability'] = predictions.flatten()
|
||||
|
||||
# Return prediction results
|
||||
return grouped_data[['Timestamp', 'Predictions', 'Prediction_Probability']]
|
||||
return grouped_data[['timestamp', 'predictions', 'prediction_probability']]
|
||||
|
||||
def update_database(self, missing_data):
|
||||
"""Update the database with the missing data."""
|
||||
@ -393,24 +419,13 @@ class BitcoinPricePredictor:
|
||||
def train_model(self):
|
||||
"""Train the LSTM model with early stopping and checkpointing."""
|
||||
if self.X_train is None or self.y_train is None:
|
||||
raise ValueError("Data not loaded. Call load_and_prepare_data() first.")
|
||||
raise ValueError("Data not loaded. Call prepare_data() first.")
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
os.makedirs("./models", exist_ok=True)
|
||||
|
||||
# Configure TensorFlow to use memory growth
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
if gpus:
|
||||
try:
|
||||
# Limit TensorFlow to use only 80% of GPU memory
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_virtual_device_configuration(
|
||||
gpu,
|
||||
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)] # Set to 2GB or adjust as needed
|
||||
)
|
||||
print("GPU memory limit set")
|
||||
except RuntimeError as e:
|
||||
print(f"GPU memory limit setting failed: {e}")
|
||||
# Configure TensorFlow for GPU memory
|
||||
self._configure_gpu_memory()
|
||||
|
||||
# Create the model
|
||||
self.model = self.create_model(input_shape=(self.timesteps, len(self.feature_columns)))
|
||||
@ -450,6 +465,21 @@ class BitcoinPricePredictor:
|
||||
self.model.save(final_model_path)
|
||||
print(f"Model saved to {final_model_path}")
|
||||
|
||||
def _configure_gpu_memory(self):
|
||||
"""Configure TensorFlow to use GPU memory efficiently."""
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
if gpus:
|
||||
try:
|
||||
# Limit TensorFlow to use only 80% of GPU memory
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_virtual_device_configuration(
|
||||
gpu,
|
||||
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
|
||||
)
|
||||
print("GPU memory limit set")
|
||||
except RuntimeError as e:
|
||||
print(f"GPU memory limit setting failed: {e}")
|
||||
|
||||
def evaluate_model(self):
|
||||
"""Evaluate the trained model on test data."""
|
||||
if self.model is None:
|
||||
@ -509,16 +539,7 @@ class BitcoinPricePredictor:
|
||||
plt.show()
|
||||
|
||||
def analyze_market_trends(self, window_size=100, prominence=0.01, height=None, threshold=0.0, distance=None):
|
||||
"""
|
||||
Analyze market trends by finding local minima and maxima in the price data.
|
||||
|
||||
Args:
|
||||
window_size (int): Default distance between peaks if distance is not provided
|
||||
prominence (float): Minimum prominence of peaks (relative to price range)
|
||||
height (float): Minimum height of peaks (absolute value)
|
||||
threshold (float): Required threshold of peaks relative to neighbors
|
||||
distance (int): Minimum distance between peaks in number of data points
|
||||
"""
|
||||
"""Analyze market trends by finding local minima and maxima in the price data."""
|
||||
matplotlib.use('TkAgg') # Use TkAgg backend for interactive plotting
|
||||
|
||||
# Make sure data is loaded
|
||||
@ -527,7 +548,7 @@ class BitcoinPricePredictor:
|
||||
return
|
||||
|
||||
# Get the closing prices
|
||||
prices = self.df['Close'].values
|
||||
prices = self.df['close'].values
|
||||
|
||||
# Calculate prominence as a percentage of price range if provided as a relative value
|
||||
price_range = np.max(prices) - np.min(prices)
|
||||
@ -540,7 +561,7 @@ class BitcoinPricePredictor:
|
||||
if distance is None:
|
||||
distance = window_size
|
||||
|
||||
# Find local maxima (peaks) with adjustable parameters
|
||||
# Find local maxima (peaks)
|
||||
peaks, peaks_props = find_peaks(
|
||||
prices,
|
||||
height=height,
|
||||
@ -549,7 +570,7 @@ class BitcoinPricePredictor:
|
||||
prominence=prominence_abs
|
||||
)
|
||||
|
||||
# Find local minima (valleys) by inverting the signal
|
||||
# Find local minima (valleys)
|
||||
valleys, valleys_props = find_peaks(
|
||||
-prices,
|
||||
height=-height if height is not None else None,
|
||||
@ -558,7 +579,16 @@ class BitcoinPricePredictor:
|
||||
prominence=prominence_abs
|
||||
)
|
||||
|
||||
# Create a new figure for trend analysis
|
||||
# Create figure for trend analysis
|
||||
self._plot_trend_analysis(prices, peaks, valleys)
|
||||
|
||||
# Print trend statistics
|
||||
self._print_trend_statistics(prices, peaks, valleys)
|
||||
|
||||
return peaks, valleys
|
||||
|
||||
def _plot_trend_analysis(self, prices, peaks, valleys):
|
||||
"""Helper method to plot trend analysis."""
|
||||
plt.figure(figsize=(14, 7))
|
||||
|
||||
# Plot the price data
|
||||
@ -571,8 +601,8 @@ class BitcoinPricePredictor:
|
||||
# Identify trends by connecting consecutive extrema
|
||||
all_points = np.sort(np.concatenate([peaks, valleys]))
|
||||
|
||||
up_trends = []
|
||||
down_trends = []
|
||||
self.up_trends = []
|
||||
self.down_trends = []
|
||||
|
||||
for i in range(len(all_points) - 1):
|
||||
start_idx = all_points[i]
|
||||
@ -588,7 +618,7 @@ class BitcoinPricePredictor:
|
||||
duration = end_idx - start_idx
|
||||
magnitude = prices[end_idx] - prices[start_idx]
|
||||
percent_change = 100 * magnitude / prices[start_idx]
|
||||
up_trends.append((duration, magnitude, percent_change))
|
||||
self.up_trends.append((duration, magnitude, percent_change))
|
||||
|
||||
elif start_idx in peaks and end_idx in valleys:
|
||||
# Downtrend
|
||||
@ -599,33 +629,30 @@ class BitcoinPricePredictor:
|
||||
duration = end_idx - start_idx
|
||||
magnitude = prices[start_idx] - prices[end_idx]
|
||||
percent_change = 100 * magnitude / prices[start_idx]
|
||||
down_trends.append((duration, magnitude, percent_change))
|
||||
self.down_trends.append((duration, magnitude, percent_change))
|
||||
|
||||
plt.title(f'Bitcoin Price Trends Analysis\nParameters: prominence={prominence}, distance={distance}')
|
||||
plt.title('Bitcoin Price Trends Analysis')
|
||||
plt.xlabel('Date')
|
||||
plt.ylabel('Price')
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
plt.tight_layout()
|
||||
plt.savefig('bitcoin_trends_analysis.png')
|
||||
plt.show(block=True)
|
||||
|
||||
# Print some statistics about the trends
|
||||
def _print_trend_statistics(self, prices, peaks, valleys):
|
||||
"""Helper method to print trend statistics."""
|
||||
print(f"Found {len(peaks)} local maxima and {len(valleys)} local minima")
|
||||
|
||||
# Calculate average trend durations and magnitudes
|
||||
if up_trends:
|
||||
avg_up_duration = sum(t[0] for t in up_trends) / len(up_trends)
|
||||
avg_up_magnitude = sum(t[1] for t in up_trends) / len(up_trends)
|
||||
avg_up_percent = sum(t[2] for t in up_trends) / len(up_trends)
|
||||
if hasattr(self, 'up_trends') and self.up_trends:
|
||||
avg_up_duration = sum(t[0] for t in self.up_trends) / len(self.up_trends)
|
||||
avg_up_magnitude = sum(t[1] for t in self.up_trends) / len(self.up_trends)
|
||||
avg_up_percent = sum(t[2] for t in self.up_trends) / len(self.up_trends)
|
||||
print(f"Average uptrend: {avg_up_duration:.1f} periods, {avg_up_magnitude:.2f} price change ({avg_up_percent:.2f}%)")
|
||||
|
||||
if down_trends:
|
||||
avg_down_duration = sum(t[0] for t in down_trends) / len(down_trends)
|
||||
avg_down_magnitude = sum(t[1] for t in down_trends) / len(down_trends)
|
||||
avg_down_percent = sum(t[2] for t in down_trends) / len(down_trends)
|
||||
if hasattr(self, 'down_trends') and self.down_trends:
|
||||
avg_down_duration = sum(t[0] for t in self.down_trends) / len(self.down_trends)
|
||||
avg_down_magnitude = sum(t[1] for t in self.down_trends) / len(self.down_trends)
|
||||
avg_down_percent = sum(t[2] for t in self.down_trends) / len(self.down_trends)
|
||||
print(f"Average downtrend: {avg_down_duration:.1f} periods, {avg_down_magnitude:.2f} price change ({avg_down_percent:.2f}%)")
|
||||
|
||||
# Show the plot interactively
|
||||
plt.show(block=True) # block=True ensures the plot window stays open
|
||||
|
||||
return peaks, valleys
|
||||
|
||||
@ -2,7 +2,8 @@ from BitcoinPricePredictor import BitcoinPricePredictor
|
||||
|
||||
if __name__ == "__main__":
|
||||
predictor = BitcoinPricePredictor(db_path='databases/bitcoin_historical_data.db', timeframe='H')
|
||||
predictor.load_data()
|
||||
predictor.load_data_csv('./data/btcusd_daily_data.csv')
|
||||
predictor.prepare_data()
|
||||
predictor.train_model()
|
||||
predictor.evaluate_model()
|
||||
predictor.plot_history()
|
||||
|
||||
2805
poetry.lock
generated
2805
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,2 +0,0 @@
|
||||
[virtualenvs]
|
||||
in-project = false
|
||||
@ -1,32 +0,0 @@
|
||||
[project]
|
||||
name = "cryptomarketparser"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = [
|
||||
{name = "Simon Moisy",email = "simon.moisy@tutanota.com"}
|
||||
]
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10,<4.0"
|
||||
dependencies = [
|
||||
"numpy (>=1.26.0,<2.2.0)",
|
||||
"pandas (>=2.2.3,<3.0.0)",
|
||||
"sqlalchemy (>=2.0.39,<3.0.0)",
|
||||
"scipy (>=1.15.2,<2.0.0)",
|
||||
"matplotlib (>=3.10.1,<4.0.0)",
|
||||
"scikit-learn (>=1.6.1,<2.0.0)",
|
||||
"ollama (>=0.4.7,<0.5.0)",
|
||||
"transformers (>=4.49.0,<5.0.0)",
|
||||
"markdownify (>=1.1.0,<2.0.0)",
|
||||
"ccxt (>=4.4.69,<5.0.0)",
|
||||
"mplfinance (>=0.12.10b0,<0.13.0)",
|
||||
]
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "pytorch"
|
||||
url = "https://download.pytorch.org/whl/cu121"
|
||||
priority = "explicit"
|
||||
BIN
requirements.txt
Normal file
BIN
requirements.txt
Normal file
Binary file not shown.
676
trend_detector_simple.py
Normal file
676
trend_detector_simple.py
Normal file
@ -0,0 +1,676 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import logging
|
||||
from scipy.signal import find_peaks
|
||||
from matplotlib.patches import Rectangle
|
||||
from scipy import stats
|
||||
from scipy import stats
|
||||
|
||||
# Color configuration
|
||||
# Plot colors
|
||||
DARK_BG_COLOR = '#181C27'
|
||||
LEGEND_BG_COLOR = '#333333'
|
||||
TITLE_COLOR = 'white'
|
||||
AXIS_LABEL_COLOR = 'white'
|
||||
|
||||
# Candlestick colors
|
||||
CANDLE_UP_COLOR = '#089981' # Green
|
||||
CANDLE_DOWN_COLOR = '#F23645' # Red
|
||||
|
||||
# Marker colors
|
||||
MIN_COLOR = 'red'
|
||||
MAX_COLOR = 'green'
|
||||
|
||||
# Line style colors
|
||||
MIN_LINE_STYLE = 'g--' # Green dashed
|
||||
MAX_LINE_STYLE = 'r--' # Red dashed
|
||||
SMA7_LINE_STYLE = 'y-' # Yellow solid
|
||||
SMA15_LINE_STYLE = 'm-' # Magenta solid
|
||||
|
||||
# SuperTrend colors
|
||||
ST_COLOR_UP = 'g-'
|
||||
ST_COLOR_DOWN = 'r-'
|
||||
|
||||
class TrendDetectorSimple:
|
||||
def __init__(self, data, verbose=False):
|
||||
"""
|
||||
Initialize the TrendDetectorSimple class.
|
||||
|
||||
Parameters:
|
||||
- data: pandas DataFrame containing price data
|
||||
- verbose: boolean, whether to display detailed logging information
|
||||
"""
|
||||
|
||||
self.data = data
|
||||
self.verbose = verbose
|
||||
|
||||
# Plot style configuration
|
||||
self.plot_style = 'dark_background'
|
||||
self.bg_color = DARK_BG_COLOR
|
||||
self.plot_size = (12, 8)
|
||||
|
||||
# Candlestick configuration
|
||||
self.candle_width = 0.6
|
||||
self.candle_up_color = CANDLE_UP_COLOR
|
||||
self.candle_down_color = CANDLE_DOWN_COLOR
|
||||
self.candle_alpha = 0.8
|
||||
self.wick_width = 1
|
||||
|
||||
# Marker configuration
|
||||
self.min_marker = '^'
|
||||
self.min_color = MIN_COLOR
|
||||
self.min_size = 100
|
||||
self.max_marker = 'v'
|
||||
self.max_color = MAX_COLOR
|
||||
self.max_size = 100
|
||||
self.marker_zorder = 100
|
||||
|
||||
# Line configuration
|
||||
self.line_width = 1
|
||||
self.min_line_style = MIN_LINE_STYLE
|
||||
self.max_line_style = MAX_LINE_STYLE
|
||||
self.sma7_line_style = SMA7_LINE_STYLE
|
||||
self.sma15_line_style = SMA15_LINE_STYLE
|
||||
|
||||
# Text configuration
|
||||
self.title_size = 14
|
||||
self.title_color = TITLE_COLOR
|
||||
self.axis_label_size = 12
|
||||
self.axis_label_color = AXIS_LABEL_COLOR
|
||||
|
||||
# Legend configuration
|
||||
self.legend_loc = 'best'
|
||||
self.legend_bg_color = LEGEND_BG_COLOR
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
self.logger = logging.getLogger('TrendDetectorSimple')
|
||||
|
||||
# Convert data to pandas DataFrame if it's not already
|
||||
if not isinstance(self.data, pd.DataFrame):
|
||||
if isinstance(self.data, list):
|
||||
self.data = pd.DataFrame({'close': self.data})
|
||||
else:
|
||||
raise ValueError("Data must be a pandas DataFrame or a list")
|
||||
|
||||
self.logger.info(f"Initialized TrendDetectorSimple with {len(self.data)} data points")
|
||||
|
||||
def calculate_tr(self):
|
||||
"""
|
||||
Calculate True Range (TR) for the price data.
|
||||
|
||||
True Range is the greatest of:
|
||||
1. Current high - current low
|
||||
2. |Current high - previous close|
|
||||
3. |Current low - previous close|
|
||||
|
||||
Returns:
|
||||
- Numpy array of TR values
|
||||
"""
|
||||
df = self.data.copy()
|
||||
high = df['high'].values
|
||||
low = df['low'].values
|
||||
close = df['close'].values
|
||||
|
||||
tr = np.zeros_like(close)
|
||||
tr[0] = high[0] - low[0] # First TR is just the first day's range
|
||||
|
||||
for i in range(1, len(close)):
|
||||
# Current high - current low
|
||||
hl_range = high[i] - low[i]
|
||||
# |Current high - previous close|
|
||||
hc_range = abs(high[i] - close[i-1])
|
||||
# |Current low - previous close|
|
||||
lc_range = abs(low[i] - close[i-1])
|
||||
|
||||
# TR is the maximum of these three values
|
||||
tr[i] = max(hl_range, hc_range, lc_range)
|
||||
|
||||
return tr
|
||||
|
||||
def calculate_atr(self, period=14):
|
||||
"""
|
||||
Calculate Average True Range (ATR) for the price data.
|
||||
|
||||
ATR is the exponential moving average of the True Range over a specified period.
|
||||
|
||||
Parameters:
|
||||
- period: int, the period for the ATR calculation (default: 14)
|
||||
|
||||
Returns:
|
||||
- Numpy array of ATR values
|
||||
"""
|
||||
|
||||
tr = self.calculate_tr()
|
||||
atr = np.zeros_like(tr)
|
||||
|
||||
# First ATR value is just the first TR
|
||||
atr[0] = tr[0]
|
||||
|
||||
# Calculate exponential moving average (EMA) of TR
|
||||
multiplier = 2.0 / (period + 1)
|
||||
|
||||
for i in range(1, len(tr)):
|
||||
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
|
||||
|
||||
return atr
|
||||
|
||||
def detect_trends(self):
|
||||
"""
|
||||
Detect trends by identifying local minima and maxima in the price data
|
||||
using scipy.signal.find_peaks.
|
||||
|
||||
Parameters:
|
||||
- prominence: float, required prominence of peaks (relative to the price range)
|
||||
- width: int, required width of peaks in data points
|
||||
|
||||
Returns:
|
||||
- DataFrame with columns for timestamps, prices, and trend indicators
|
||||
- Dictionary containing analysis results including linear regression, SMAs, and SuperTrend indicators
|
||||
"""
|
||||
df = self.data.copy()
|
||||
close_prices = df['close'].values
|
||||
|
||||
# Find peaks in the price data
|
||||
max_peaks, _ = find_peaks(close_prices)
|
||||
min_peaks, _ = find_peaks(-close_prices)
|
||||
|
||||
# Create boolean columns for min and max peaks using vectorized operations
|
||||
df['is_max'] = False
|
||||
df['is_min'] = False
|
||||
df.iloc[max_peaks, df.columns.get_loc('is_max')] = True
|
||||
df.iloc[min_peaks, df.columns.get_loc('is_min')] = True
|
||||
|
||||
result = df[['datetime', 'close', 'is_min', 'is_max']].copy()
|
||||
|
||||
# Perform linear regression on min_peaks and max_peaks
|
||||
min_prices = df['close'].iloc[min_peaks].values
|
||||
max_prices = df['close'].iloc[max_peaks].values
|
||||
|
||||
# Linear regression for min peaks if we have at least 2 points
|
||||
min_slope, min_intercept, min_r_value, _, _ = stats.linregress(min_peaks, min_prices)
|
||||
# Linear regression for max peaks if we have at least 2 points
|
||||
max_slope, max_intercept, max_r_value, _, _ = stats.linregress(max_peaks, max_prices)
|
||||
|
||||
# Calculate Simple Moving Averages (SMA) for 7 and 15 periods
|
||||
sma_7 = pd.Series(close_prices).rolling(window=7, min_periods=1).mean().values
|
||||
sma_15 = pd.Series(close_prices).rolling(window=15, min_periods=1).mean().values
|
||||
|
||||
analysis_results = {}
|
||||
analysis_results['linear_regression'] = {
|
||||
'min': {
|
||||
'slope': min_slope,
|
||||
'intercept': min_intercept,
|
||||
'r_squared': min_r_value ** 2
|
||||
},
|
||||
'max': {
|
||||
'slope': max_slope,
|
||||
'intercept': max_intercept,
|
||||
'r_squared': max_r_value ** 2
|
||||
}
|
||||
}
|
||||
analysis_results['sma'] = {
|
||||
'7': sma_7,
|
||||
'15': sma_15
|
||||
}
|
||||
|
||||
# Calculate SuperTrend indicators
|
||||
supertrend_results_list = self._calculate_supertrend_indicators()
|
||||
meta_results = self.calculate_metasupertrend(df, supertrend_results_list)
|
||||
analysis_results['supertrend'] = supertrend_results_list
|
||||
analysis_results['metasupertrend'] = meta_results
|
||||
|
||||
return result, analysis_results
|
||||
|
||||
def _calculate_supertrend_indicators(self):
|
||||
"""
|
||||
Calculate SuperTrend indicators with different parameter sets.
|
||||
|
||||
Returns:
|
||||
- list, the SuperTrend results
|
||||
"""
|
||||
supertrend_params = [
|
||||
{"period": 12, "multiplier": 3.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
|
||||
{"period": 10, "multiplier": 1.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
|
||||
{"period": 11, "multiplier": 2.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN}
|
||||
]
|
||||
|
||||
supertrend_results_list = []
|
||||
for params in supertrend_params:
|
||||
supertrend_results = self.calculate_supertrend(
|
||||
period=params["period"],
|
||||
multiplier=params["multiplier"]
|
||||
)
|
||||
supertrend_results_list.append({
|
||||
"results": supertrend_results,
|
||||
"params": params
|
||||
})
|
||||
|
||||
return supertrend_results_list
|
||||
|
||||
def calculate_supertrend(self, period, multiplier):
|
||||
"""
|
||||
Calculate SuperTrend indicator for the price data.
|
||||
|
||||
SuperTrend is a trend-following indicator that uses ATR to determine the trend direction.
|
||||
|
||||
Parameters:
|
||||
- period: int, the period for the ATR calculation (default: 10)
|
||||
- multiplier: float, the multiplier for the ATR (default: 3.0)
|
||||
|
||||
Returns:
|
||||
- Dictionary containing SuperTrend values, trend direction, and upper/lower bands
|
||||
"""
|
||||
df = self.data.copy()
|
||||
high = df['high'].values
|
||||
low = df['low'].values
|
||||
close = df['close'].values
|
||||
|
||||
# Calculate ATR
|
||||
atr = self.calculate_atr(period)
|
||||
|
||||
# Calculate basic upper and lower bands
|
||||
upper_band = np.zeros_like(close)
|
||||
lower_band = np.zeros_like(close)
|
||||
|
||||
for i in range(len(close)):
|
||||
# Calculate the basic bands
|
||||
hl_avg = (high[i] + low[i]) / 2
|
||||
upper_band[i] = hl_avg + (multiplier * atr[i])
|
||||
lower_band[i] = hl_avg - (multiplier * atr[i])
|
||||
|
||||
# Calculate final upper and lower bands with trend logic
|
||||
final_upper = np.zeros_like(close)
|
||||
final_lower = np.zeros_like(close)
|
||||
supertrend = np.zeros_like(close)
|
||||
trend = np.zeros_like(close) # 1 for uptrend, -1 for downtrend
|
||||
|
||||
# Initialize first values
|
||||
final_upper[0] = upper_band[0]
|
||||
final_lower[0] = lower_band[0]
|
||||
|
||||
# If close price is above upper band, we're in a downtrend (ST = upper band)
|
||||
# If close price is below lower band, we're in an uptrend (ST = lower band)
|
||||
if close[0] <= upper_band[0]:
|
||||
supertrend[0] = upper_band[0]
|
||||
trend[0] = -1 # Downtrend
|
||||
else:
|
||||
supertrend[0] = lower_band[0]
|
||||
trend[0] = 1 # Uptrend
|
||||
|
||||
# Calculate SuperTrend for the rest of the data
|
||||
for i in range(1, len(close)):
|
||||
# Calculate final upper band
|
||||
if (upper_band[i] < final_upper[i-1]) or (close[i-1] > final_upper[i-1]):
|
||||
final_upper[i] = upper_band[i]
|
||||
else:
|
||||
final_upper[i] = final_upper[i-1]
|
||||
|
||||
# Calculate final lower band
|
||||
if (lower_band[i] > final_lower[i-1]) or (close[i-1] < final_lower[i-1]):
|
||||
final_lower[i] = lower_band[i]
|
||||
else:
|
||||
final_lower[i] = final_lower[i-1]
|
||||
|
||||
# Determine trend and SuperTrend value
|
||||
if supertrend[i-1] == final_upper[i-1] and close[i] <= final_upper[i]:
|
||||
# Continuing downtrend
|
||||
supertrend[i] = final_upper[i]
|
||||
trend[i] = -1
|
||||
elif supertrend[i-1] == final_upper[i-1] and close[i] > final_upper[i]:
|
||||
# Switching to uptrend
|
||||
supertrend[i] = final_lower[i]
|
||||
trend[i] = 1
|
||||
elif supertrend[i-1] == final_lower[i-1] and close[i] >= final_lower[i]:
|
||||
# Continuing uptrend
|
||||
supertrend[i] = final_lower[i]
|
||||
trend[i] = 1
|
||||
elif supertrend[i-1] == final_lower[i-1] and close[i] < final_lower[i]:
|
||||
# Switching to downtrend
|
||||
supertrend[i] = final_upper[i]
|
||||
trend[i] = -1
|
||||
|
||||
# Prepare result
|
||||
supertrend_results = {
|
||||
'supertrend': supertrend,
|
||||
'trend': trend,
|
||||
'upper_band': final_upper,
|
||||
'lower_band': final_lower
|
||||
}
|
||||
|
||||
return supertrend_results
|
||||
|
||||
def plot_trends(self, trend_data, analysis_results, view="both"):
|
||||
"""
|
||||
Plot the price data with detected trends using a candlestick chart.
|
||||
Also plots SuperTrend indicators with three different parameter sets.
|
||||
|
||||
Parameters:
|
||||
- trend_data: DataFrame, the output from detect_trends()
|
||||
- analysis_results: Dictionary containing analysis results from detect_trends()
|
||||
- view: str, one of 'both', 'trend', 'supertrend'; determines which plot(s) to display
|
||||
|
||||
Returns:
|
||||
- None (displays the plot)
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.patches import Rectangle
|
||||
|
||||
plt.style.use(self.plot_style)
|
||||
|
||||
if view == "both":
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(self.plot_size[0]*2, self.plot_size[1]))
|
||||
else:
|
||||
fig, ax = plt.subplots(figsize=self.plot_size)
|
||||
ax1 = ax2 = None
|
||||
if view == "trend":
|
||||
ax1 = ax
|
||||
elif view == "supertrend":
|
||||
ax2 = ax
|
||||
|
||||
fig.patch.set_facecolor(self.bg_color)
|
||||
if ax1: ax1.set_facecolor(self.bg_color)
|
||||
if ax2: ax2.set_facecolor(self.bg_color)
|
||||
|
||||
df = self.data.copy()
|
||||
|
||||
if ax1:
|
||||
self._plot_trend_analysis(ax1, df, trend_data, analysis_results)
|
||||
|
||||
if ax2:
|
||||
self._plot_supertrend_analysis(ax2, df, analysis_results)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
def _plot_candlesticks(self, ax, df):
|
||||
"""
|
||||
Plot candlesticks on the given axis.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to plot on
|
||||
- df: pandas.DataFrame, the data to plot
|
||||
"""
|
||||
from matplotlib.patches import Rectangle
|
||||
|
||||
for i in range(len(df)):
|
||||
# Get OHLC values for this candle
|
||||
open_val = df['open'].iloc[i]
|
||||
close_val = df['close'].iloc[i]
|
||||
high_val = df['high'].iloc[i]
|
||||
low_val = df['low'].iloc[i]
|
||||
|
||||
# Determine candle color
|
||||
color = self.candle_up_color if close_val >= open_val else self.candle_down_color
|
||||
|
||||
# Plot candle body
|
||||
body_height = abs(close_val - open_val)
|
||||
bottom = min(open_val, close_val)
|
||||
rect = Rectangle((i - self.candle_width/2, bottom), self.candle_width, body_height,
|
||||
color=color, alpha=self.candle_alpha)
|
||||
ax.add_patch(rect)
|
||||
|
||||
# Plot candle wicks
|
||||
ax.plot([i, i], [low_val, high_val], color=color, linewidth=self.wick_width)
|
||||
|
||||
def _plot_trend_analysis(self, ax, df, trend_data, analysis_results):
|
||||
"""
|
||||
Plot trend analysis on the given axis.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to plot on
|
||||
- df: pandas.DataFrame, the data to plot
|
||||
- trend_data: pandas.DataFrame, the trend data
|
||||
- analysis_results: dict, the analysis results
|
||||
"""
|
||||
# Draw candlesticks
|
||||
self._plot_candlesticks(ax, df)
|
||||
|
||||
# Plot minima and maxima points
|
||||
self._plot_min_max_points(ax, df, trend_data)
|
||||
|
||||
# Plot trend lines and moving averages
|
||||
if analysis_results:
|
||||
self._plot_trend_lines(ax, df, analysis_results)
|
||||
|
||||
# Configure the subplot
|
||||
self._configure_subplot(ax, 'Price Chart with Trend Analysis', len(df))
|
||||
|
||||
def _plot_min_max_points(self, ax, df, trend_data):
|
||||
"""
|
||||
Plot minimum and maximum points on the given axis.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to plot on
|
||||
- df: pandas.DataFrame, the data to plot
|
||||
- trend_data: pandas.DataFrame, the trend data
|
||||
"""
|
||||
min_indices = trend_data.index[trend_data['is_min'] == True].tolist()
|
||||
if min_indices:
|
||||
min_y = [df['close'].iloc[i] for i in min_indices]
|
||||
ax.scatter(min_indices, min_y, color=self.min_color, s=self.min_size,
|
||||
marker=self.min_marker, label='Local Minima', zorder=self.marker_zorder)
|
||||
|
||||
max_indices = trend_data.index[trend_data['is_max'] == True].tolist()
|
||||
if max_indices:
|
||||
max_y = [df['close'].iloc[i] for i in max_indices]
|
||||
ax.scatter(max_indices, max_y, color=self.max_color, s=self.max_size,
|
||||
marker=self.max_marker, label='Local Maxima', zorder=self.marker_zorder)
|
||||
|
||||
def _plot_trend_lines(self, ax, df, analysis_results):
|
||||
"""
|
||||
Plot trend lines on the given axis.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to plot on
|
||||
- df: pandas.DataFrame, the data to plot
|
||||
- analysis_results: dict, the analysis results
|
||||
"""
|
||||
x_vals = np.arange(len(df))
|
||||
|
||||
# Minima regression line (support)
|
||||
min_slope = analysis_results['linear_regression']['min']['slope']
|
||||
min_intercept = analysis_results['linear_regression']['min']['intercept']
|
||||
min_line = min_slope * x_vals + min_intercept
|
||||
ax.plot(x_vals, min_line, self.min_line_style, linewidth=self.line_width,
|
||||
label='Minima Regression')
|
||||
|
||||
# Maxima regression line (resistance)
|
||||
max_slope = analysis_results['linear_regression']['max']['slope']
|
||||
max_intercept = analysis_results['linear_regression']['max']['intercept']
|
||||
max_line = max_slope * x_vals + max_intercept
|
||||
ax.plot(x_vals, max_line, self.max_line_style, linewidth=self.line_width,
|
||||
label='Maxima Regression')
|
||||
|
||||
# SMA-7 line
|
||||
sma_7 = analysis_results['sma']['7']
|
||||
ax.plot(x_vals, sma_7, self.sma7_line_style, linewidth=self.line_width,
|
||||
label='SMA-7')
|
||||
|
||||
# SMA-15 line
|
||||
sma_15 = analysis_results['sma']['15']
|
||||
valid_idx_15 = ~np.isnan(sma_15)
|
||||
ax.plot(x_vals[valid_idx_15], sma_15[valid_idx_15], self.sma15_line_style,
|
||||
linewidth=self.line_width, label='SMA-15')
|
||||
|
||||
def _configure_subplot(self, ax, title, data_length):
|
||||
"""
|
||||
Configure the subplot with title, labels, limits, and legend.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to configure
|
||||
- title: str, the title of the subplot
|
||||
- data_length: int, the length of the data
|
||||
"""
|
||||
# Set title and labels
|
||||
ax.set_title(title, fontsize=self.title_size, color=self.title_color)
|
||||
ax.set_xlabel('Date', fontsize=self.axis_label_size, color=self.axis_label_color)
|
||||
ax.set_ylabel('Price', fontsize=self.axis_label_size, color=self.axis_label_color)
|
||||
|
||||
# Set appropriate x-axis limits
|
||||
ax.set_xlim(-0.5, data_length - 0.5)
|
||||
|
||||
# Add a legend
|
||||
ax.legend(loc=self.legend_loc, facecolor=self.legend_bg_color)
|
||||
|
||||
def _plot_supertrend_analysis(self, ax, df, analysis_results=None):
|
||||
"""
|
||||
Plot SuperTrend analysis on the given axis.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to plot on
|
||||
- df: pandas.DataFrame, the data to plot
|
||||
- supertrend_results_list: list, the SuperTrend results (optional)
|
||||
"""
|
||||
self._plot_candlesticks(ax, df)
|
||||
self._plot_supertrend_lines(ax, df, analysis_results['supertrend'], style='Both')
|
||||
self._configure_subplot(ax, 'Multiple SuperTrend Indicators', len(df))
|
||||
|
||||
def _plot_supertrend_lines(self, ax, df, analysis_results, style="Horizontal"):
|
||||
"""
|
||||
Plot SuperTrend lines on the given axis.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to plot on
|
||||
- df: pandas.DataFrame, the data to plot
|
||||
- supertrend_results_list: list, the SuperTrend results
|
||||
"""
|
||||
x_vals = np.arange(len(df))
|
||||
|
||||
if style == 'Horizontal' or style == 'Both':
|
||||
if len(analysis_results) != 3:
|
||||
raise ValueError("Expected exactly 3 SuperTrend results for meta calculation")
|
||||
|
||||
trends = [st["results"]["trend"] for st in analysis_results]
|
||||
|
||||
band_height = 0.02 * (df["high"].max() - df["low"].min())
|
||||
y_base = df["low"].min() - band_height * 1.5
|
||||
|
||||
prev_color = None
|
||||
for i in range(1, len(x_vals)):
|
||||
t_vals = [t[i] for t in trends]
|
||||
up_count = t_vals.count(1)
|
||||
down_count = t_vals.count(-1)
|
||||
|
||||
if down_count == 3:
|
||||
color = "red"
|
||||
elif down_count == 2 and up_count == 1:
|
||||
color = "orange"
|
||||
elif down_count == 1 and up_count == 2:
|
||||
color = "yellow"
|
||||
elif up_count == 3:
|
||||
color = "green"
|
||||
else:
|
||||
continue # skip if unknown or inconsistent values
|
||||
|
||||
ax.add_patch(Rectangle(
|
||||
(x_vals[i-1], y_base),
|
||||
1,
|
||||
band_height,
|
||||
color=color,
|
||||
linewidth=0,
|
||||
alpha=0.6
|
||||
))
|
||||
# Draw a vertical line at the change of color
|
||||
if prev_color and prev_color != color:
|
||||
ax.axvline(x_vals[i-1], color="grey", alpha=0.3, linewidth=1)
|
||||
prev_color = color
|
||||
|
||||
ax.set_ylim(bottom=y_base - band_height * 0.5)
|
||||
if style == 'Curves' or style == 'Both':
|
||||
for st in analysis_results:
|
||||
params = st["params"]
|
||||
results = st["results"]
|
||||
supertrend = results["supertrend"]
|
||||
trend = results["trend"]
|
||||
|
||||
# Plot SuperTrend line with color based on trend
|
||||
for i in range(1, len(x_vals)):
|
||||
if trend[i] == 1: # Uptrend
|
||||
ax.plot(x_vals[i-1:i+1], supertrend[i-1:i+1], params["color_up"], linewidth=self.line_width)
|
||||
else: # Downtrend
|
||||
ax.plot(x_vals[i-1:i+1], supertrend[i-1:i+1], params["color_down"], linewidth=self.line_width)
|
||||
self._plot_metasupertrend_lines(ax, df, analysis_results)
|
||||
self._add_supertrend_legend(ax, analysis_results)
|
||||
|
||||
def _plot_metasupertrend_lines(self, ax, df, analysis_results):
|
||||
"""
|
||||
Plot a Meta SuperTrend line where all individual SuperTrends agree on trend.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to plot on
|
||||
- df: pandas.DataFrame, the data to plot
|
||||
- supertrend_results_list: list, each item contains SuperTrend 'results' and 'params'
|
||||
"""
|
||||
x_vals = np.arange(len(df))
|
||||
meta_results = self.calculate_metasupertrend(df, analysis_results)
|
||||
params = analysis_results[0]["params"] # Use first config for styling
|
||||
|
||||
for i in meta_results['consensus_points']:
|
||||
if i > 0: # Skip first point as we need a previous point to draw a line
|
||||
if i-1 in meta_results['consensus_points']: # Only draw if previous point was also a consensus
|
||||
meta_trend = meta_results['meta_trends'][i]
|
||||
color = params["color_up"] if meta_trend == 1 else params["color_down"]
|
||||
ax.plot(x_vals[i-1:i+1],
|
||||
[meta_results['meta_values'][i-1], meta_results['meta_values'][i]],
|
||||
color, linewidth=self.line_width)
|
||||
|
||||
def _add_supertrend_legend(self, ax, supertrend_results_list):
|
||||
"""
|
||||
Add SuperTrend legend entries to the given axis.
|
||||
|
||||
Parameters:
|
||||
- ax: matplotlib.axes.Axes, the axis to add legend entries to
|
||||
- supertrend_results_list: list, the SuperTrend results
|
||||
"""
|
||||
for st in supertrend_results_list:
|
||||
params = st["params"]
|
||||
period = params["period"]
|
||||
multiplier = params["multiplier"]
|
||||
color_up = params["color_up"]
|
||||
color_down = params["color_down"]
|
||||
|
||||
ax.plot([], [], color_up, linewidth=self.line_width,
|
||||
label=f'ST (P:{period}, M:{multiplier}) Up')
|
||||
ax.plot([], [], color_down, linewidth=self.line_width,
|
||||
label=f'ST (P:{period}, M:{multiplier}) Down')
|
||||
|
||||
def calculate_metasupertrend(self, df, supertrend_results_list):
|
||||
"""
|
||||
Calculate Meta SuperTrend values where all individual SuperTrends agree on trend.
|
||||
|
||||
Parameters:
|
||||
- df: pandas.DataFrame, the data containing price information
|
||||
- supertrend_results_list: list, each item contains SuperTrend 'results' and 'params'
|
||||
|
||||
Returns:
|
||||
- dict containing:
|
||||
- meta_trends: list of trend values (1 for uptrend, -1 for downtrend, 0 for no consensus)
|
||||
- meta_values: list of averaged supertrend values where trends agree
|
||||
- consensus_points: list of indices where all trends agree
|
||||
"""
|
||||
if len(supertrend_results_list) != 3:
|
||||
raise ValueError("Expected exactly 3 SuperTrend results for meta calculation")
|
||||
|
||||
trends = [st["results"]["trend"] for st in supertrend_results_list]
|
||||
supertrends = [st["results"]["supertrend"] for st in supertrend_results_list]
|
||||
|
||||
data_length = len(df)
|
||||
meta_trends = np.zeros(data_length) # 0 means no consensus
|
||||
meta_values = np.zeros(data_length)
|
||||
consensus_points = []
|
||||
|
||||
for i in range(1, data_length):
|
||||
t1, t2, t3 = trends[0][i], trends[1][i], trends[2][i]
|
||||
if t1 == t2 == t3:
|
||||
meta_trends[i] = t1
|
||||
meta_values[i] = np.mean([s[i] for s in supertrends])
|
||||
consensus_points.append(i)
|
||||
|
||||
return {
|
||||
'meta_trends': meta_trends,
|
||||
'meta_values': meta_values,
|
||||
'consensus_points': consensus_points
|
||||
}
|
||||
|
||||
48
utils/converty_csv_to_day.py
Normal file
48
utils/converty_csv_to_day.py
Normal file
@ -0,0 +1,48 @@
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
def aggregate_to_daily(input_csv, output_csv):
|
||||
"""
|
||||
Load a CSV file with Bitcoin price data, aggregate to daily values, and save to a new CSV.
|
||||
|
||||
Args:
|
||||
input_csv (str): Path to the input CSV file
|
||||
output_csv (str): Path to save the output CSV file
|
||||
"""
|
||||
try:
|
||||
# Read the CSV file
|
||||
print(f"Loading data from {input_csv}...")
|
||||
df = pd.read_csv(input_csv)
|
||||
|
||||
# Convert timestamp to datetime
|
||||
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
|
||||
df.set_index('Timestamp', inplace=True)
|
||||
|
||||
# Aggregate to daily values
|
||||
print("Aggregating data to daily values...")
|
||||
daily_df = df.resample('D').agg({
|
||||
'Open': 'first',
|
||||
'High': 'max',
|
||||
'Low': 'min',
|
||||
'Close': 'last',
|
||||
'Volume': 'sum'
|
||||
})
|
||||
|
||||
# Reset index to make Timestamp a column
|
||||
daily_df.reset_index(inplace=True)
|
||||
|
||||
# Save to new CSV
|
||||
print(f"Saving daily data to {output_csv}...")
|
||||
daily_df.to_csv(output_csv, index=False)
|
||||
|
||||
print(f"Successfully processed {len(df)} records into {len(daily_df)} daily records")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing data: {str(e)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
input_file = "../data/btcusd_1-min_data.csv" # Update this path to your input file
|
||||
output_file = "../data/btcusd_daily_data.csv" # Update this path to your desired output file
|
||||
|
||||
aggregate_to_daily(input_file, output_file)
|
||||
Loading…
x
Reference in New Issue
Block a user