Compare commits

...

17 Commits

Author SHA1 Message Date
Simon Moisy
261ce54f37 adding metasupertrend data WIP 2025-05-13 16:13:52 +08:00
Simon Moisy
fcc0342fd8 Add whale activity plotting and alert processing to ChartView
- Introduced a new subplot for whale activity in ChartView (WIP)
- Implemented alert queue processing to handle whale alerts.
- Updated WhalesWatcher to support alert queue integration.
2025-03-25 17:00:53 +08:00
Simon Moisy
389cd7c919 Update log file path to 'logs/whales_watcher.log' 2025-03-25 08:20:11 +08:00
Simon Moisy
89c4ba21fa Update .gitignore to exclude additional files and directories including docker_run.sh, finBERT, launch.json, and binary files. 2025-03-25 08:18:52 +08:00
Simon Moisy
e3a09e406d new chartview class, updated db locations 2025-03-25 08:16:13 +08:00
Simon Moisy
8dfe81ab61 new chartview class, updated db locations 2025-03-25 08:15:27 +08:00
Simon Moisy
19f1aa36f1 writing results in db and file, ajusted with no smoothing window by default 2025-03-24 16:31:50 +08:00
Simon Moisy
876f1f37a1 to cuda 2025-03-22 08:15:33 +08:00
Simon Moisy
679f1bd941 chunked content to 512 2025-03-22 08:00:58 +08:00
Jericho
3864d7e93c test calls to finbert working - need to chunk at 512 2025-03-22 06:25:14 +08:00
Simon Moisy
b4ef1ad8a2 updated model call 2025-03-22 05:13:12 +08:00
Simon Moisy
c156898316 added subpath 2025-03-22 05:03:54 +08:00
Simon Moisy
247c59b800 fix on finBERT call 2025-03-22 04:59:01 +08:00
Simon Moisy
8784e29cb2 added finbert as submodule 2025-03-22 04:55:30 +08:00
Simon Moisy
268c09be9e updated finbert calls 2025-03-22 04:52:17 +08:00
Simon Moisy
885f51b83d cleanup 2025-03-22 04:47:52 +08:00
Simon Moisy
4f665db568 .gitignore 2025-03-22 04:46:59 +08:00
21 changed files with 1619 additions and 2160 deletions

9
.gitignore vendored
View File

@@ -2,9 +2,14 @@
# Byte-compiled / optimized / DLL files
.idea/*
docker_run.sh
finBERT
launch.json
*.bin
data/
.zip
.db
*.zip
*.db
plots/
results/

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "finBERT"]
path = finBERT
url = https://github.com/ProsusAI/finBERT

View File

@@ -19,10 +19,10 @@ from scipy.signal import find_peaks
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure
import matplotlib
from trend_detector_simple import TrendDetectorSimple
class BitcoinPricePredictor:
def __init__(self, db_path, timeframe, model=None, timesteps=10, batch_size=8, learning_rate=0.001, epochs=50):
def __init__(self, db_path, timeframe, model=None, timesteps=10, batch_size=32, learning_rate=0.001, epochs=50):
self.db_path = db_path
self.engine = create_engine(f'sqlite:///{self.db_path}')
self.timesteps = timesteps
@@ -37,8 +37,9 @@ class BitcoinPricePredictor:
self.history = None
self.scaler = None
self.timeframe = timeframe
self.feature_columns = ['Open', 'High', 'Low', 'Close', 'Volume',
'HL_Ratio', 'SMA_7', 'SMA_21', 'Price_Change']
self.feature_columns = ['open', 'high', 'low', 'close', 'volume',
'hl_ratio', 'sma_7', 'sma_21', 'price_change']
self.df = None
@staticmethod
def reduce_mem_usage(df):
@@ -73,54 +74,73 @@ class BitcoinPricePredictor:
return df
def add_essential_features(self, df):
"""Add technical indicators and features to the dataframe."""
print("Adding technical indicators and features...")
df = df.copy()
# Price ratio features
df['HL_Ratio'] = (df['High'] / df['Low']).clip(lower=0.8, upper=1.2)
df['hl_ratio'] = (df['high'] / df['low']).clip(lower=0.8, upper=1.2)
# Moving averages with different timeframes
df['SMA_7'] = df['Close'].rolling(window=7, min_periods=1).mean()
df['SMA_21'] = df['Close'].rolling(window=21, min_periods=1).mean()
df['SMA_50'] = df['Close'].rolling(window=50, min_periods=1).mean()
# Moving averages
df['sma_7'] = df['close'].rolling(window=7, min_periods=1).mean()
df['sma_21'] = df['close'].rolling(window=21, min_periods=1).mean()
df['sma_50'] = df['close'].rolling(window=50, min_periods=1).mean()
# Exponential moving averages
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['ema_12'] = df['close'].ewm(span=12, adjust=False).mean()
df['ema_26'] = df['close'].ewm(span=26, adjust=False).mean()
# MACD
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
df['macd'] = df['ema_12'] - df['ema_26']
df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
# Relative Strength Index (RSI)
delta = df['Close'].diff()
# RSI
delta = df['close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))
df['rsi'] = 100 - (100 / (1 + rs))
# Bollinger Bands
df['BB_Middle'] = df['Close'].rolling(window=20).mean()
df['BB_Std'] = df['Close'].rolling(window=20).std()
df['BB_Upper'] = df['BB_Middle'] + 2 * df['BB_Std']
df['BB_Lower'] = df['BB_Middle'] - 2 * df['BB_Std']
df['BB_Width'] = (df['BB_Upper'] - df['BB_Lower']) / df['BB_Middle']
df['bb_middle'] = df['close'].rolling(window=20).mean()
df['bb_std'] = df['close'].rolling(window=20).std()
df['bb_upper'] = df['bb_middle'] + 2 * df['bb_std']
df['bb_lower'] = df['bb_middle'] - 2 * df['bb_std']
df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_middle']
# Price changes at different timeframes
df['Price_Change_1d'] = df['Close'].pct_change(periods=1).clip(lower=-0.5, upper=0.5)
df['Price_Change_3d'] = df['Close'].pct_change(periods=3).clip(lower=-0.5, upper=0.5)
df['Price_Change_7d'] = df['Close'].pct_change(periods=7).clip(lower=-0.5, upper=0.5)
# Price changes
df['price_change_1d'] = df['close'].pct_change(periods=1).clip(lower=-0.5, upper=0.5)
df['price_change_3d'] = df['close'].pct_change(periods=3).clip(lower=-0.5, upper=0.5)
df['price_change_7d'] = df['close'].pct_change(periods=7).clip(lower=-0.5, upper=0.5)
# Volatility
df['Volatility'] = df['Close'].rolling(window=14).std() / df['Close'].rolling(window=14).mean()
df['volatility'] = df['close'].rolling(window=14).std() / df['close'].rolling(window=14).mean()
# Clean up any NaN or infinite values
# Get trend indicators from TrendDetector
# TrendDetector already expects lowercase columns, so no need for conversion
if 'datetime' not in df.columns:
df['datetime'] = df.index
trend_detector = TrendDetectorSimple(df)
trend_data, trend_analysis = trend_detector.detect_trends()
# Add supertrend signals
for i, st in enumerate(trend_analysis['supertrend']):
df[f'supertrend_{i+1}'] = st['results']['trend']
# Add meta-supertrend consensus
meta_results = trend_detector.calculate_metasupertrend(df, trend_analysis['supertrend'])
df['metasupertrend'] = meta_results['meta_trends']
# Add SMA crossover signals
df['sma_cross'] = np.where(trend_analysis['sma']['7'] > trend_analysis['sma']['15'], 1, -1)
# Clean up NaN or infinite values
df = df.fillna(0)
df = df.replace([np.inf, -np.inf], 0)
# Update feature columns list
self.feature_columns = [col for col in df.columns if col not in ['Next_Period_Return', 'Next_Period_Up']]
# Update feature columns list - exclude non-numeric columns
self.feature_columns = [col for col in df.columns if col not in ['next_period_return', 'next_period_up', 'datetime']]
print(f"Shape after adding features: {df.shape}")
return df
@@ -135,18 +155,15 @@ class BitcoinPricePredictor:
def create_sequences_for_prediction(self, data):
"""Create sequences of data for prediction without targets."""
x = []
for i in range(len(data) - self.timesteps):
x.append(data[i:i + self.timesteps])
return np.array(x, dtype=np.float32)
return np.array([data[i:i + self.timesteps] for i in range(len(data) - self.timesteps)], dtype=np.float32)
def create_model(self, input_shape):
"""Create and compile the LSTM model architecture."""
model = Sequential([
LSTM(64, return_sequences=True, input_shape=input_shape,
recurrent_dropout=0.2, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=1e-5, l2=1e-4)),
recurrent_dropout=0.2, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
Dropout(0.3),
LSTM(32, return_sequences=True, recurrent_dropout=0.1, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=1e-5, l2=1e-4)),
LSTM(32, return_sequences=True, recurrent_dropout=0.1, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
Dropout(0.2),
LSTM(16),
Dropout(0.2),
@@ -162,39 +179,68 @@ class BitcoinPricePredictor:
print(model.summary())
return model
def load_data(self):
import pandas as pd
import sqlite3
def load_data_csv(self, file_path):
"""Load Bitcoin price data from a CSV file."""
try:
# Read the CSV file
self.df = pd.read_csv(file_path)
# Convert column names to lowercase
self.df.columns = self.df.columns.str.lower()
# Convert timestamp to datetime
self.df['timestamp'] = pd.to_datetime(self.df['timestamp'])
self.df.set_index('timestamp', inplace=True)
if self.df is not None and not self.df.empty:
print(f"Data loaded successfully from CSV. Shape: {self.df.shape}")
else:
print("Failed to load data. DataFrame is empty or None.")
except Exception as e:
print(f"Error loading CSV data: {str(e)}")
self.df = None
conn = sqlite3.connect(self.db_path)
self.df = pd.read_sql_query("SELECT * FROM bitcoin_data", conn)
if self.df is not None and not self.df.empty:
print(f"Data loaded successfully. Shape: {self.df.shape}")
else:
print("Failed to load data. DataFrame is empty or None.")
conn.close()
def load_data(self):
"""Load data from SQLite database."""
try:
import sqlite3
conn = sqlite3.connect(self.db_path)
self.df = pd.read_sql_query("SELECT * FROM bitcoin_data", conn)
# Convert column names to lowercase
self.df.columns = self.df.columns.str.lower()
if self.df is not None and not self.df.empty:
print(f"Data loaded successfully. Shape: {self.df.shape}")
else:
print("Failed to load data. DataFrame is empty or None.")
conn.close()
except Exception as e:
print(f"Error loading database data: {str(e)}")
self.df = None
def prepare_data(self):
"""Prepare data for model training."""
start_time = time.time()
#df = self.resample_data(df)
self.df = self.add_essential_features(self.df)
# Define target variable - binary classification for price movement
self.df['Next_Period_Return'] = self.df['Close'].pct_change(periods=1).shift(-1).clip(lower=-0.5, upper=0.5)
self.df['Next_Period_Up'] = (self.df['Next_Period_Return'] > 0).astype(np.int8)
self.df['next_period_return'] = self.df['close'].pct_change(periods=1).shift(-1).clip(lower=-0.5, upper=0.5)
self.df['next_period_up'] = (self.df['next_period_return'] > 0).astype(np.int8)
self.df = self.df.dropna()
# Scale features
self.scaler = RobustScaler()
self.df[self.feature_columns] = self.scaler.fit_transform(self.df[self.feature_columns])
# Ensure we're only scaling numeric features
numeric_features = [col for col in self.feature_columns if col != 'datetime' and pd.api.types.is_numeric_dtype(self.df[col])]
self.df[numeric_features] = self.scaler.fit_transform(self.df[numeric_features])
# Create sequences for LSTM
x, y = self.create_sequences(self.df[self.feature_columns].values, self.df['Next_Period_Up'].values)
x, y = self.create_sequences(self.df[numeric_features].values, self.df['next_period_up'].values)
print(f"Sequence shape: {x.shape}, Target shape: {y.shape}")
# Class balance check
@@ -204,31 +250,21 @@ class BitcoinPricePredictor:
# Train-test split (chronological)
split_idx = int(len(x) * 0.8)
x_train, x_test = x[:split_idx], x[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]
# Free memory
# del self.df
# gc.collect()
self.X_train, self.X_test = x_train, x_test
self.y_train, self.y_test = y_train, y_test
self.X_train, self.X_test = x[:split_idx], x[split_idx:]
self.y_train, self.y_test = y[:split_idx], y[split_idx:]
print(f"Training data shape: {self.X_train.shape}, Test data shape: {self.X_test.shape}")
class_counts = np.bincount(self.y_train.astype(int))
print(f"Class distribution in training data: 0={class_counts[0]}, 1={class_counts[1]}")
print(f"Data preparation completed in {time.time() - start_time:.2f} seconds")
def resample_data(self, df):
"""Resample data to specified timeframe."""
print(f"Resampling data to {self.timeframe} timeframe...")
df = df.resample(self.timeframe).agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last',
'Volume': 'sum'
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
})
print(f"Shape after resampling: {df.shape}")
return df
@@ -236,10 +272,14 @@ class BitcoinPricePredictor:
def load_new_data_from_model(self):
"""Load new data and identify missing entries compared to the database."""
new_data = pd.read_csv("./data/btcusd_1-min_data.csv")
new_data['Timestamp'] = pd.to_datetime(new_data['Timestamp'], unit='s')
# Convert column names to lowercase
new_data.columns = new_data.columns.str.lower()
new_data['timestamp'] = pd.to_datetime(new_data['timestamp'], unit='s')
existing_data = pd.read_sql('SELECT * FROM bitcoin_data', self.engine, index_col='Timestamp',
parse_dates=['Timestamp'])
existing_data = pd.read_sql('SELECT * FROM bitcoin_data', self.engine, index_col='timestamp',
parse_dates=['timestamp'])
# Convert column names to lowercase
existing_data.columns = existing_data.columns.str.lower()
# Show the most recent entries in the database
last_entries = existing_data.sort_index(ascending=False).head(10)
@@ -248,7 +288,7 @@ class BitcoinPricePredictor:
# Find missing data
latest_timestamp = existing_data.index.max()
missing_data = new_data[new_data['Timestamp'] > latest_timestamp]
missing_data = new_data[new_data['timestamp'] > latest_timestamp]
print(f"New data total length: {len(new_data)}")
print(f"Missing data entries: {len(missing_data)}")
@@ -271,36 +311,36 @@ class BitcoinPricePredictor:
return data
def make_predictions_w_reality(self, new_data):
"""Make predictions and compare with actual outcomes."""
# Ensure the 'Timestamp' column is present
if 'Timestamp' not in new_data.columns:
raise ValueError("Input data must contain a 'Timestamp' column.")
def _prepare_prediction_data(self, new_data):
"""Helper method to prepare data for prediction."""
# Ensure the 'timestamp' column is present
if 'timestamp' not in new_data.columns:
raise ValueError("Input data must contain a 'timestamp' column.")
# Convert 'Timestamp' to datetime and set as index
new_data['Timestamp'] = pd.to_datetime(new_data['Timestamp'], errors='coerce')
new_data = new_data.dropna(subset=['Timestamp']) # Drop rows where Timestamp is NaT
new_data.set_index('Timestamp', inplace=True)
# Convert 'timestamp' to datetime and set as index
new_data['timestamp'] = pd.to_datetime(new_data['timestamp'], errors='coerce')
new_data = new_data.dropna(subset=['timestamp']) # Drop rows where Timestamp is NaT
new_data.set_index('timestamp', inplace=True)
# Resample and aggregate data to the specified timeframe
grouped_data = new_data.resample(self.timeframe).agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last',
'Volume': 'sum'
}).reset_index() # Reset index to preserve 'Timestamp' as a column
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}).reset_index() # Reset index to preserve 'timestamp' as a column
if grouped_data.empty:
print("No new data found.")
return None, None
return None
# Preprocess the data
grouped_data = self.preprocess_data(grouped_data)
if grouped_data.empty:
print("No new data after preprocessing.")
return None, None
return None
# Create sequences for the model
X = self.create_sequences_for_prediction(grouped_data[self.feature_columns].values)
@@ -308,6 +348,19 @@ class BitcoinPricePredictor:
if len(X) == 0:
print("Not enough data to create sequences.")
return None, None
return X, grouped_data
def make_predictions_w_reality(self, new_data):
"""Make predictions and compare with actual outcomes."""
# Convert column names to lowercase if needed
new_data.columns = new_data.columns.str.lower()
prepared_data = self._prepare_prediction_data(new_data)
if prepared_data is None:
return None, None
X, grouped_data = prepared_data
# Generate predictions
predictions = self.model.predict(X)
@@ -316,57 +369,30 @@ class BitcoinPricePredictor:
grouped_data = grouped_data.iloc[self.timesteps:] # Align with sequence length
# Add predictions to the grouped_data DataFrame
grouped_data['Predictions'] = (predictions > 0.5).astype(int)
grouped_data['Prediction_Probability'] = predictions
grouped_data['predictions'] = (predictions > 0.5).astype(int)
grouped_data['prediction_probability'] = predictions
# Calculate reality (actual price movement)
grouped_data['Reality'] = (grouped_data['Close'].pct_change() > 0.005).astype(int)
grouped_data['reality'] = (grouped_data['close'].pct_change() > 0.005).astype(int)
# Calculate accuracy
grouped_data['Correct'] = (grouped_data['Predictions'] == grouped_data['Reality']).astype(int)
accuracy = grouped_data['Correct'].mean()
grouped_data['correct'] = (grouped_data['predictions'] == grouped_data['reality']).astype(int)
accuracy = grouped_data['correct'].mean()
print(f"Prediction accuracy: {accuracy:.2f}")
# Return predictions and reality
return grouped_data[['Timestamp', 'Predictions', 'Prediction_Probability']], grouped_data['Reality']
return grouped_data[['timestamp', 'predictions', 'prediction_probability']], grouped_data['reality']
def make_predictions(self, new_data):
"""Make predictions on new data."""
# Ensure the 'Timestamp' column is present
if 'Timestamp' not in new_data.columns:
raise ValueError("Input data must contain a 'Timestamp' column.")
# Convert 'Timestamp' to datetime and set as index
new_data['Timestamp'] = pd.to_datetime(new_data['Timestamp'], errors='coerce')
new_data = new_data.dropna(subset=['Timestamp']) # Drop rows where Timestamp is NaT
new_data.set_index('Timestamp', inplace=True)
# Resample and aggregate data to the specified timeframe
grouped_data = new_data.resample(self.timeframe).agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last',
'Volume': 'sum'
}).reset_index() # Reset index to preserve 'Timestamp' as a column
if grouped_data.empty:
print("No new data found.")
return None
# Preprocess the data
grouped_data = self.preprocess_data(grouped_data)
if grouped_data.empty:
print("No new data after preprocessing.")
return None
# Create sequences for the model
X = self.create_sequences_for_prediction(grouped_data[self.feature_columns].values)
# Convert column names to lowercase if needed
new_data.columns = new_data.columns.str.lower()
if len(X) == 0:
print("Not enough data to create sequences.")
prepared_data = self._prepare_prediction_data(new_data)
if prepared_data is None:
return None
X, grouped_data = prepared_data
# Generate predictions
predictions = self.model.predict(X)
@@ -375,11 +401,11 @@ class BitcoinPricePredictor:
grouped_data = grouped_data.iloc[self.timesteps:]
# Add predictions to the grouped_data DataFrame
grouped_data['Predictions'] = (predictions > 0.5).astype(int)
grouped_data['Prediction_Probability'] = predictions.flatten()
grouped_data['predictions'] = (predictions > 0.5).astype(int)
grouped_data['prediction_probability'] = predictions.flatten()
# Return prediction results
return grouped_data[['Timestamp', 'Predictions', 'Prediction_Probability']]
return grouped_data[['timestamp', 'predictions', 'prediction_probability']]
def update_database(self, missing_data):
"""Update the database with the missing data."""
@@ -393,24 +419,13 @@ class BitcoinPricePredictor:
def train_model(self):
"""Train the LSTM model with early stopping and checkpointing."""
if self.X_train is None or self.y_train is None:
raise ValueError("Data not loaded. Call load_and_prepare_data() first.")
raise ValueError("Data not loaded. Call prepare_data() first.")
# Create model directory if it doesn't exist
os.makedirs("./models", exist_ok=True)
# Configure TensorFlow to use memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Limit TensorFlow to use only 80% of GPU memory
for gpu in gpus:
tf.config.experimental.set_virtual_device_configuration(
gpu,
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)] # Set to 2GB or adjust as needed
)
print("GPU memory limit set")
except RuntimeError as e:
print(f"GPU memory limit setting failed: {e}")
# Configure TensorFlow for GPU memory
self._configure_gpu_memory()
# Create the model
self.model = self.create_model(input_shape=(self.timesteps, len(self.feature_columns)))
@@ -450,6 +465,21 @@ class BitcoinPricePredictor:
self.model.save(final_model_path)
print(f"Model saved to {final_model_path}")
def _configure_gpu_memory(self):
"""Configure TensorFlow to use GPU memory efficiently."""
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Limit TensorFlow to use only 80% of GPU memory
for gpu in gpus:
tf.config.experimental.set_virtual_device_configuration(
gpu,
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
)
print("GPU memory limit set")
except RuntimeError as e:
print(f"GPU memory limit setting failed: {e}")
def evaluate_model(self):
"""Evaluate the trained model on test data."""
if self.model is None:
@@ -509,16 +539,7 @@ class BitcoinPricePredictor:
plt.show()
def analyze_market_trends(self, window_size=100, prominence=0.01, height=None, threshold=0.0, distance=None):
"""
Analyze market trends by finding local minima and maxima in the price data.
Args:
window_size (int): Default distance between peaks if distance is not provided
prominence (float): Minimum prominence of peaks (relative to price range)
height (float): Minimum height of peaks (absolute value)
threshold (float): Required threshold of peaks relative to neighbors
distance (int): Minimum distance between peaks in number of data points
"""
"""Analyze market trends by finding local minima and maxima in the price data."""
matplotlib.use('TkAgg') # Use TkAgg backend for interactive plotting
# Make sure data is loaded
@@ -527,7 +548,7 @@ class BitcoinPricePredictor:
return
# Get the closing prices
prices = self.df['Close'].values
prices = self.df['close'].values
# Calculate prominence as a percentage of price range if provided as a relative value
price_range = np.max(prices) - np.min(prices)
@@ -540,7 +561,7 @@ class BitcoinPricePredictor:
if distance is None:
distance = window_size
# Find local maxima (peaks) with adjustable parameters
# Find local maxima (peaks)
peaks, peaks_props = find_peaks(
prices,
height=height,
@@ -549,7 +570,7 @@ class BitcoinPricePredictor:
prominence=prominence_abs
)
# Find local minima (valleys) by inverting the signal
# Find local minima (valleys)
valleys, valleys_props = find_peaks(
-prices,
height=-height if height is not None else None,
@@ -558,7 +579,16 @@ class BitcoinPricePredictor:
prominence=prominence_abs
)
# Create a new figure for trend analysis
# Create figure for trend analysis
self._plot_trend_analysis(prices, peaks, valleys)
# Print trend statistics
self._print_trend_statistics(prices, peaks, valleys)
return peaks, valleys
def _plot_trend_analysis(self, prices, peaks, valleys):
"""Helper method to plot trend analysis."""
plt.figure(figsize=(14, 7))
# Plot the price data
@@ -571,8 +601,8 @@ class BitcoinPricePredictor:
# Identify trends by connecting consecutive extrema
all_points = np.sort(np.concatenate([peaks, valleys]))
up_trends = []
down_trends = []
self.up_trends = []
self.down_trends = []
for i in range(len(all_points) - 1):
start_idx = all_points[i]
@@ -588,7 +618,7 @@ class BitcoinPricePredictor:
duration = end_idx - start_idx
magnitude = prices[end_idx] - prices[start_idx]
percent_change = 100 * magnitude / prices[start_idx]
up_trends.append((duration, magnitude, percent_change))
self.up_trends.append((duration, magnitude, percent_change))
elif start_idx in peaks and end_idx in valleys:
# Downtrend
@@ -599,33 +629,30 @@ class BitcoinPricePredictor:
duration = end_idx - start_idx
magnitude = prices[start_idx] - prices[end_idx]
percent_change = 100 * magnitude / prices[start_idx]
down_trends.append((duration, magnitude, percent_change))
self.down_trends.append((duration, magnitude, percent_change))
plt.title(f'Bitcoin Price Trends Analysis\nParameters: prominence={prominence}, distance={distance}')
plt.title('Bitcoin Price Trends Analysis')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig('bitcoin_trends_analysis.png')
plt.show(block=True)
# Print some statistics about the trends
def _print_trend_statistics(self, prices, peaks, valleys):
"""Helper method to print trend statistics."""
print(f"Found {len(peaks)} local maxima and {len(valleys)} local minima")
# Calculate average trend durations and magnitudes
if up_trends:
avg_up_duration = sum(t[0] for t in up_trends) / len(up_trends)
avg_up_magnitude = sum(t[1] for t in up_trends) / len(up_trends)
avg_up_percent = sum(t[2] for t in up_trends) / len(up_trends)
if hasattr(self, 'up_trends') and self.up_trends:
avg_up_duration = sum(t[0] for t in self.up_trends) / len(self.up_trends)
avg_up_magnitude = sum(t[1] for t in self.up_trends) / len(self.up_trends)
avg_up_percent = sum(t[2] for t in self.up_trends) / len(self.up_trends)
print(f"Average uptrend: {avg_up_duration:.1f} periods, {avg_up_magnitude:.2f} price change ({avg_up_percent:.2f}%)")
if down_trends:
avg_down_duration = sum(t[0] for t in down_trends) / len(down_trends)
avg_down_magnitude = sum(t[1] for t in down_trends) / len(down_trends)
avg_down_percent = sum(t[2] for t in down_trends) / len(down_trends)
if hasattr(self, 'down_trends') and self.down_trends:
avg_down_duration = sum(t[0] for t in self.down_trends) / len(self.down_trends)
avg_down_magnitude = sum(t[1] for t in self.down_trends) / len(self.down_trends)
avg_down_percent = sum(t[2] for t in self.down_trends) / len(self.down_trends)
print(f"Average downtrend: {avg_down_duration:.1f} periods, {avg_down_magnitude:.2f} price change ({avg_down_percent:.2f}%)")
# Show the plot interactively
plt.show(block=True) # block=True ensures the plot window stays open
return peaks, valleys

296
ChartView.py Normal file
View File

@@ -0,0 +1,296 @@
import mplfinance as mpf
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
import matplotlib.animation as animation
import ccxt
from matplotlib.dates import date2num
import threading
import tkinter as tk
from tkinter import ttk
import queue
from datetime import datetime
class ChartView:
def __init__(self, exchange_ids, symbol='BTC/USDT', timeframe='1m', limit=100, alert_queue=None):
self.exchange_ids = exchange_ids
self.symbol = symbol
self.timeframe = timeframe
self.limit = limit
self.running = False
self.fig = None
self.ax1 = None
self.ax2 = None
self.ax_whales = None # New axis for whale activity
self.animation = None
self.current_exchange_id = exchange_ids[0]
self.root = None
self.canvas = None
self.alert_queue = alert_queue
self.whale_markers = []
# Maintain a dict of whale alerts by exchange
self.whale_alerts = {exchange_id: [] for exchange_id in exchange_ids}
# Create exchanges dictionary to avoid reconnecting each time
self.exchanges = {}
for exchange_id in exchange_ids:
self.connect_to_exchange(exchange_id)
def connect_to_exchange(self, exchange_id):
if exchange_id in self.exchanges:
self.exchange = self.exchanges[exchange_id]
return
try:
exchange_class = getattr(ccxt, exchange_id)
exchange = exchange_class({
'enableRateLimit': True,
})
exchange.load_markets()
if self.symbol not in exchange.markets:
raise Exception(f"Symbol {self.symbol} not found on {exchange_id}")
self.exchanges[exchange_id] = exchange
self.exchange = exchange
except Exception as e:
raise Exception(f"Failed to connect to {exchange_id}: {str(e)}")
def fetch_ohlcv(self, exchange_id):
try:
exchange = self.exchanges[exchange_id]
ohlcv = exchange.fetch_ohlcv(self.symbol, self.timeframe, limit=self.limit)
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)
return df
except Exception as e:
print(f"Error fetching OHLCV data from {exchange_id}: {str(e)}")
return None
def update_chart(self):
if not self.running:
return
df = self.fetch_ohlcv(self.current_exchange_id)
if df is not None:
self.ax1.clear()
self.ax2.clear()
self.ax_whales.clear()
# Process any pending whale alerts from the queue
self.process_whale_queue()
# Plot the chart
mpf.plot(df, type='candle', ax=self.ax1, volume=self.ax2,
style='yahoo', xrotation=0, ylabel='Price',
ylabel_lower='Volume', show_nontrading=False)
latest_price = df['close'].iloc[-1]
self.ax1.set_title(f'{self.symbol} on {self.current_exchange_id} - Last: ${latest_price:.2f}')
# Plot whale activity on the dedicated subplot
self.plot_whale_activity(df)
# Refresh the canvas
self.canvas.draw()
def process_whale_queue(self):
"""Process any new whale alerts from the queue"""
if self.alert_queue is None:
return
# Process all available alerts
while not self.alert_queue.empty():
try:
alert = self.alert_queue.get_nowait()
exchange_id = alert['exchange']
# Add timestamp if not present
if 'timestamp' not in alert:
alert['timestamp'] = datetime.now().isoformat()
# Store the alert with the appropriate exchange
if exchange_id in self.whale_alerts:
self.whale_alerts[exchange_id].append(alert)
print(f"New whale alert for {exchange_id}: {alert['type']} ${alert['value_usd']:,.2f}")
self.alert_queue.task_done()
except queue.Empty:
break
except Exception as e:
print(f"Error processing whale alert from queue: {str(e)}")
if self.alert_queue is not None:
self.alert_queue.task_done()
def plot_whale_activity(self, df):
"""Plot whale activity on the dedicated whale subplot"""
if self.current_exchange_id not in self.whale_alerts:
self.ax_whales.set_ylabel('Whale Activity')
return
# Get the whale alerts for current exchange
exchange_alerts = self.whale_alerts[self.current_exchange_id]
if not exchange_alerts:
self.ax_whales.set_ylabel('Whale Activity')
return
try:
# Create a dataframe from the whale alerts
alerts_df = pd.DataFrame(exchange_alerts)
# Handle the timestamp conversion carefully
alerts_df['timestamp'] = pd.to_datetime(alerts_df['timestamp'])
# Check if the dataframe timestamps have timezone info
has_tz = False
if not alerts_df.empty:
has_tz = alerts_df['timestamp'].iloc[0].tzinfo is not None
# Get the start and end time of the current chart
start_time = df.index[0]
end_time = df.index[-1]
# Convert all timestamps to naive (remove timezone info) for comparison
# First create a copy of the timestamp column
alerts_df['plot_timestamp'] = alerts_df['timestamp']
# If timestamps have timezone, convert them to naive by replacing with their UTC equivalent
if has_tz:
alerts_df['plot_timestamp'] = alerts_df['timestamp'].dt.tz_localize(None)
else:
# If timestamps are naive, assume they're in local time and adjust by GMT+8 offset
alerts_df['plot_timestamp'] = alerts_df['timestamp'] - pd.Timedelta(hours=8)
# Filter to only include alerts in the visible time range
visible_alerts = alerts_df[
(alerts_df['plot_timestamp'] >= start_time) &
(alerts_df['plot_timestamp'] <= end_time)
]
if visible_alerts.empty:
self.ax_whales.set_ylabel('Whale Activity')
return
# Create two separate series for buy and sell orders
buy_orders = visible_alerts[visible_alerts['type'] == 'bid'].copy()
sell_orders = visible_alerts[visible_alerts['type'] == 'ask'].copy()
# Draw the buy orders as green bars going up
if not buy_orders.empty:
buy_values = buy_orders['value_usd'] / 1_000_000 # Convert to millions
self.ax_whales.bar(buy_orders['plot_timestamp'], buy_values,
color='green', alpha=0.6, width=pd.Timedelta(minutes=1))
# Draw the sell orders as red bars going down
if not sell_orders.empty:
sell_values = -1 * sell_orders['value_usd'] / 1_000_000 # Convert to millions and make negative
self.ax_whales.bar(sell_orders['plot_timestamp'], sell_values,
color='red', alpha=0.6, width=pd.Timedelta(minutes=1))
# Format the whale activity subplot
self.ax_whales.set_ylabel('Whale Activity ($M)')
# Add zero line
self.ax_whales.axhline(y=0, color='black', linestyle='-', alpha=0.3)
# Set y-axis limits with some padding
all_values = visible_alerts['value_usd'] / 1_000_000
if len(all_values) > 0:
max_val = all_values.max() * 1.1
self.ax_whales.set_ylim(-max_val, max_val)
# Align the x-axis with the price chart
self.ax_whales.sharex(self.ax1)
# Add text labels for significant whale activity
for idx, row in visible_alerts.iterrows():
value_millions = row['value_usd'] / 1_000_000
sign = 1 if row['type'] == 'bid' else -1
position = sign * value_millions
if abs(value_millions) > max(all_values) * 0.3: # Only label significant activity
self.ax_whales.text(
row['plot_timestamp'], position * 1.05,
f"${abs(value_millions):.1f}M",
ha='center', va='bottom' if sign > 0 else 'top',
fontsize=8, color='green' if sign > 0 else 'red'
)
except Exception as e:
print(f"Error plotting whale activity: {str(e)}")
import traceback
traceback.print_exc()
self.ax_whales.set_ylabel('Whale Activity - Error plotting data')
def parse_timeframe_to_minutes(self, timeframe):
"""Convert timeframe string to minutes"""
if timeframe.endswith('m'):
return int(timeframe[:-1])
elif timeframe.endswith('h'):
return int(timeframe[:-1]) * 60
elif timeframe.endswith('d'):
return int(timeframe[:-1]) * 1440
return 1 # default to 1 minute
def on_exchange_change(self, event=None):
selected_exchange = self.exchange_var.get()
if selected_exchange != self.current_exchange_id:
self.current_exchange_id = selected_exchange
self.update_chart()
def start_gui(self):
self.root = tk.Tk()
self.root.title("Crypto Chart Viewer")
self.root.geometry("1200x800")
# Create frame for controls
control_frame = ttk.Frame(self.root)
control_frame.pack(side=tk.TOP, fill=tk.X, padx=10, pady=5)
# Dropdown for selecting exchange
ttk.Label(control_frame, text="Exchange:").pack(side=tk.LEFT, padx=(0, 5))
self.exchange_var = tk.StringVar(value=self.current_exchange_id)
exchange_dropdown = ttk.Combobox(control_frame, textvariable=self.exchange_var, values=self.exchange_ids, width=15)
exchange_dropdown.pack(side=tk.LEFT, padx=(0, 10))
exchange_dropdown.bind("<<ComboboxSelected>>", self.on_exchange_change)
# Create the figure with three subplots
self.fig = plt.Figure(figsize=(12, 8), dpi=100)
# Adjust the subplot grid to add whale activity panel
# Price chart (60%), Volume (20%), Whale Activity (20%)
self.ax1 = self.fig.add_subplot(5, 1, (1, 3)) # Price chart - 3/5 of the space
self.ax2 = self.fig.add_subplot(5, 1, 4, sharex=self.ax1) # Volume - 1/5 of the space
self.ax_whales = self.fig.add_subplot(5, 1, 5, sharex=self.ax1) # Whale activity - 1/5 of the space
# Create the canvas to display the figure
self.canvas = FigureCanvasTkAgg(self.fig, master=self.root)
self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
# Add toolbar
toolbar = NavigationToolbar2Tk(self.canvas, self.root)
toolbar.update()
self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
# Initial chart update
self.running = True
self.update_chart()
# Set up periodic updates
def update():
if self.running:
self.update_chart()
self.root.after(10000, update) # Update every 10 seconds
self.root.after(10000, update)
# Handle window close
def on_closing():
self.running = False
self.root.destroy()
self.root.protocol("WM_DELETE_WINDOW", on_closing)
# Start the Tkinter event loop
self.root.mainloop()

226
WhalesWatcher.py Normal file
View File

@@ -0,0 +1,226 @@
import pandas as pd
import time
from datetime import datetime, timezone
import logging
import ccxt
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class Alert(Base):
__tablename__ = 'alerts'
id = Column(Integer, primary_key=True)
exchange = Column(String)
order_type = Column(String)
amount = Column(Float)
price = Column(Float)
value_usd = Column(Float)
timestamp = Column(DateTime)
class WhalesWatcher:
def __init__(self, alert_queue=None):
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
filename='logs/whales_watcher.log',
filemode='a'
)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
self.logger = logging.getLogger('WhalesWatcher')
self.logger.addHandler(console_handler)
self.exchanges = {}
self.symbol = 'BTC/USDT'
self.threshold = 100000
self.check_interval = 5
self.large_orders = []
self.is_running = False
self.alert_queue = alert_queue
self.logger.info("WhalesWatcher initialized with symbol: %s and threshold: $%d", self.symbol, self.threshold)
self.engine = create_engine('sqlite:///databases/alerts.db')
Base.metadata.create_all(self.engine)
self.Session = sessionmaker(bind=self.engine)
def connect_to_exchanges(self, exchange_ids, api_key=None, secret=None):
"""Connect to multiple cryptocurrency exchanges."""
connected_exchanges = {}
for exchange_id in exchange_ids:
self.logger.info("Attempting to connect to exchange: %s", exchange_id)
try:
exchange_class = getattr(ccxt, exchange_id)
exchange = exchange_class({
'apiKey': api_key,
'secret': secret,
'enableRateLimit': True,
})
exchange.load_markets()
connected_exchanges[exchange_id] = exchange
self.logger.info(f"Connected to {exchange_id} successfully")
except Exception as e:
self.logger.error(f"Failed to connect to {exchange_id}: {str(e)}")
return connected_exchanges
def fetch_order_book(self, exchange, symbol):
"""Fetch the current order book for the given symbol on the specified exchange."""
try:
if exchange.id == 'huobi':
order_book = exchange.fetch_order_book(symbol, limit=150)
else:
order_book = exchange.fetch_order_book(symbol, limit=100)
return order_book
except Exception as e:
self.logger.error(f"Error fetching order book from {exchange.id} for {symbol}: {str(e)}")
return None
def detect_whales(self, order_book):
"""Detect whale orders in the order book."""
if not order_book:
return []
whale_orders = []
# Get current time in UTC
current_time_utc = datetime.now().astimezone().astimezone(timezone.utc)
for bid in order_book['bids']:
if (len(bid) == 3):
price, amount, timestamp = bid
else:
price, amount = bid
value = price * amount
if value >= self.threshold:
whale_orders.append({
'type': 'bid',
'price': price,
'amount': amount,
'value_usd': value,
'timestamp': current_time_utc.isoformat()
})
for ask in order_book['asks']:
if (len(ask) == 3):
price, amount, timestamp = ask
else:
price, amount = ask
value = price * amount
if value >= self.threshold:
whale_orders.append({
'type': 'ask',
'price': price,
'amount': amount,
'value_usd': value,
'timestamp': current_time_utc.isoformat()
})
return whale_orders
def analyze_whale_activity(self):
"""Analyze the collected whale activity."""
self.logger.info("Analyzing whale activity")
if not self.large_orders:
return "No whale activity detected yet."
df = pd.DataFrame(self.large_orders)
# Basic analysis
bid_count = len(df[df['type'] == 'bid'])
ask_count = len(df[df['type'] == 'ask'])
total_bid_value = df[df['type'] == 'bid']['value_usd'].sum()
total_ask_value = df[df['type'] == 'ask']['value_usd'].sum()
analysis = {
'total_whales': len(df),
'buy_orders': bid_count,
'sell_orders': ask_count,
'buy_pressure': total_bid_value,
'sell_pressure': total_ask_value,
'net_pressure': total_bid_value - total_ask_value
}
return analysis
def save_data(self, filename='whale_activity.csv'):
"""Save the collected whale data to a CSV file."""
self.logger.info("Saving whale activity data to %s", filename)
if self.large_orders:
df = pd.DataFrame(self.large_orders)
df.to_csv(filename, index=False)
self.logger.info(f"Saved whale activity data to {filename}")
def log_alert(self, order):
"""Log the alert to the database and alert queue."""
session = self.Session()
alert = Alert(
exchange=order['exchange'],
order_type='BUY' if order['type'] == 'bid' else 'SELL',
amount=order['amount'],
price=order['price'],
value_usd=order['value_usd'],
timestamp=datetime.now()
)
session.add(alert)
session.commit()
session.close()
# Add to queue if available
if self.alert_queue is not None:
self.alert_queue.put(order)
def run(self):
"""Run the whale watcher continuously."""
self.logger.info("Running whale watcher")
if not self.exchanges:
# Connect to multiple exchanges
exchange_ids = ['binance', 'kraken', 'coinbase', 'bitfinex', 'huobi', 'kucoin', 'bybit', 'okx', 'gateio', 'mexc']
self.exchanges = self.connect_to_exchanges(exchange_ids)
if not self.exchanges:
self.logger.error("Cannot run whale watcher without connecting to any exchange")
return
self.is_running = True
self.logger.info(f"Starting whale watcher with threshold ${self.threshold:,}")
self.logger.info(f"Connected to {len(self.exchanges)} exchanges: {', '.join(self.exchanges.keys())}")
try:
while self.is_running:
for exchange_id, exchange in self.exchanges.items():
if self.symbol not in exchange.markets:
continue
order_book = self.fetch_order_book(exchange, self.symbol)
if not order_book:
continue
whale_orders = self.detect_whales(order_book)
if whale_orders:
for order in whale_orders:
order['exchange'] = exchange_id
self.large_orders.extend(whale_orders)
self.log_alert(order)
order_type = "BUY" if order['type'] == 'bid' else "SELL"
self.logger.info(
f"WHALE ALERT on {exchange_id}: {order_type} {order['amount']:.4f} BTC at ${order['price']:,.2f} " +
f"(${order['value_usd']:,.2f})"
)
time.sleep(self.check_interval)
except Exception as e:
self.logger.error(f"Error in run method: {str(e)}")
def stop(self):
"""Stop the whale watcher."""
self.logger.info("Stopping whale watcher...")
self.is_running = False
self.logger.info("Stopping whale watcher...")

View File

@@ -1,5 +1,6 @@
from enum import Enum
from transformers import pipeline
from finBERT.finbert.finbert import predict
from transformers import AutoModelForSequenceClassification
import ollama
from pydantic import BaseModel
import markdownify
@@ -24,7 +25,8 @@ class ArticleClassification(BaseModel):
class ArticleAnalyzer:
def __init__(self):
self.classifier = pipeline("text-classification", model="ProsusAI/finbert")
self.model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert", num_labels=3, cache_dir=None)
self.model.to("cuda")
self.base_prompt = """
Classify the following article into one of these categories:
- Regulatory News
@@ -74,5 +76,6 @@ class ArticleAnalyzer:
def classify_article_finbert(self, article_html):
article_md = self.convert_to_markdown(article_html)
result = self.classifier(article_md)
return result
results = predict(article_md, model=self.model, use_gpu=True)
return results

View File

@@ -5,7 +5,22 @@ from scipy.signal import find_peaks
import matplotlib.pyplot as plt
import matplotlib
from sklearn.linear_model import LinearRegression
from matplotlib.widgets import Slider
from sqlalchemy import create_engine, Column, Integer, String, Float, MetaData, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import OperationalError
Base = declarative_base()
class PriceExtreme(Base):
__tablename__ = 'price_extremes'
id = Column(Integer, primary_key=True, autoincrement=True)
timestamp = Column(String)
price = Column(Float)
type = Column(String)
prominence = Column(Float)
class BitcoinTrendAnalysis:
@@ -28,7 +43,6 @@ class BitcoinTrendAnalysis:
print("Failed to load data. DataFrame is empty or None.")
def adaptive_find_peaks(self, smooth_prices, window, factor, distance):
print(factor)
prominences = np.zeros_like(smooth_prices)
for i in range(len(smooth_prices)):
@@ -38,12 +52,11 @@ class BitcoinTrendAnalysis:
local_min = np.min(smooth_prices[start:end])
prominences[i] = (local_max - local_min) * factor
print(prominences)
peaks, _ = find_peaks(smooth_prices, prominence=prominences, distance=distance)
valleys, _ = find_peaks(-smooth_prices, prominence=prominences, distance=distance)
return peaks, valleys, prominences
def analyze_trends_peaks(self, resample_window='D', smoothing_window=10, prominence_factor=0.5, window=30,
def analyze_trends_peaks(self, resample_window='D', smoothing_window=1, prominence_factor=0.5, window=30,
distance=None):
matplotlib.use('TkAgg')
@@ -54,6 +67,7 @@ class BitcoinTrendAnalysis:
self.df = self.df.resample(resample_window).agg({'Close': 'last'})
prices = self.df['Close'].values
smooth_prices = pd.Series(prices).rolling(window=smoothing_window).mean()
print(f"Smooth prices: {len(smooth_prices)} vs prices {len(prices)}")
fig, ax = plt.subplots(figsize=(14, 7))
plt.subplots_adjust(bottom=0.25)
@@ -62,18 +76,65 @@ class BitcoinTrendAnalysis:
distance=distance)
ax.plot(self.df.index, smooth_prices, label='Bitcoin Smooth Price')
ax.plot(self.df.index, prices, label='Bitcoin Price')
ax.scatter(self.df.index[peaks], smooth_prices[peaks], color='green', s=100, marker='^', label='Local Maxima')
ax.scatter(self.df.index[valleys], smooth_prices[valleys], color='red', s=100, marker='v', label='Local Minima')
for peak, valley in zip(peaks, valleys):
ax.plot([self.df.index[peak], self.df.index[valley]], [smooth_prices[peak], smooth_prices[valley]],
color='orange', lw=1)
ax.set_title(f'Bitcoin Price Trends Analysis\nfactor={prominence_factor}')
ax.set_xlabel('Date')
ax.set_ylabel('Price')
ax.legend()
ax.grid(True)
engine = create_engine('sqlite:///databases/bitcoin_trends.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
try:
session.query(PriceExtreme).delete()
except OperationalError as e:
print(f"Error occurred: {e}. The table may not exist.")
extremes_to_insert = []
with open(f'peaks_and_valleys_{resample_window}_{smoothing_window}_{prominence_factor}_{window}_{distance}.txt', 'w') as file:
for peak in peaks:
peak_date = self.df.index[peak].strftime('%Y-%m-%d %H:%M:%S')
peak_price = float(smooth_prices[peak])
peak_prominence = float(prominences[peak])
extremes_to_insert.append(
PriceExtreme(
timestamp=peak_date,
price=peak_price,
type='peak',
prominence=peak_prominence
)
)
file.write(f"Peak: {peak_date}, Price: {peak_price}, Prominence: {peak_prominence}\n")
for valley in valleys:
valley_date = self.df.index[valley].strftime('%Y-%m-%d %H:%M:%S')
valley_price = float(smooth_prices[valley])
valley_prominence = float(prominences[valley])
extremes_to_insert.append(
PriceExtreme(
timestamp=valley_date,
price=valley_price,
type='valley',
prominence=valley_prominence
)
)
file.write(f"Valley: {valley_date}, Price: {valley_price}, Prominence: {valley_prominence}\n")
session.bulk_save_objects(extremes_to_insert)
session.commit()
session.close()
print(f"Saved {len(peaks)} peaks and {len(valleys)} valleys to bitcoin_trends.db")
print("Peaks and valleys written to peaks_and_valleys.txt")
plt.show()

1
finBERT Submodule

Submodule finBERT added at 44995e0c58

View File

@@ -1,8 +1,11 @@
import os
import sys
from sqlalchemy import create_engine, Column, String, Float, MetaData, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from article_analyzer import ArticleAnalyzer
import nltk
import logging
Base = declarative_base()
@@ -25,9 +28,14 @@ def read_html_files(folder_path):
if __name__ == "__main__":
# nltk.set_proxy('http://127.0.0.1:7890')
# nltk.download('punkt_tab')
logging.basicConfig(level=logging.CRITICAL)
analyzer = ArticleAnalyzer()
engine = create_engine('sqlite:///article_analysis.db')
engine = create_engine('sqlite:///databases/article_analysis.db')
Session = sessionmaker(bind=engine)
session = Session()
@@ -35,30 +43,42 @@ if __name__ == "__main__":
print(f"Parsed {len(html_files)} html files")
Base.metadata.create_all(engine)
# result = analyzer.classify_article_finbert("Strong earning growth and expending market shares have positionned the company for long term success.")
# print(f'result {result}')
for file, content in html_files.items():
result = analyzer.classify_article_finbert(content)
chunk_size = 512
chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
results = []
for chunk in chunks:
if chunk.strip():
chunk_result = analyzer.classify_article_finbert(chunk)
results.extend(chunk_result)
result = results if results else [{'label': 'neutral', 'score': 0.0}]
filename = os.path.basename(file)
print(f'result {result}')
# label = result[0]['label']
# score = result[0]['score']
# analysis = ArticleAnalysis(filename=filename, label=label, score=score)
label = result[0]['label']
score = result[0]['score']
# try:
# session.add(analysis)
# session.commit()
# except:
# session.rollback()
analysis = ArticleAnalysis(filename=filename, label=label, score=score)
try:
session.add(analysis)
session.commit()
except:
session.rollback()
# existing = session.query(ArticleAnalysis).filter_by(filename=filename).first()
# if existing:
# existing.label = label
# existing.score = score
# session.commit()
# finally:
# session.close()
existing = session.query(ArticleAnalysis).filter_by(filename=filename).first()
if existing:
existing.label = label
existing.score = score
session.commit()
finally:
session.close()
print(f"article [{file}] - analyzed as [{result}]\n")
# print(f"article [{file}] - analyzed as [{result}]\n")

View File

@@ -1,8 +1,9 @@
from BitcoinPricePredictor import BitcoinPricePredictor
if __name__ == "__main__":
predictor = BitcoinPricePredictor(db_path='bitcoin_historical_data.db', timeframe='H')
predictor.load_data()
predictor = BitcoinPricePredictor(db_path='databases/bitcoin_historical_data.db', timeframe='H')
predictor.load_data_csv('./data/btcusd_daily_data.csv')
predictor.prepare_data()
predictor.train_model()
predictor.evaluate_model()
predictor.plot_history()

View File

@@ -1,6 +1,6 @@
from bitcoin_trend_analysis import BitcoinTrendAnalysis
if __name__ == "__main__":
ma = BitcoinTrendAnalysis(db_path='bitcoin_historical_data.db')
ma = BitcoinTrendAnalysis(db_path='databases/bitcoin_historical_data.db')
ma.load_data()
ma.analyze_trends_peaks(distance=1, prominence_factor=0.1)

27
main_whales_watcher.py Normal file
View File

@@ -0,0 +1,27 @@
from WhalesWatcher import WhalesWatcher
from ChartView import ChartView
import threading
import queue
def start_whales_watcher(alert_queue):
whales_watcher = WhalesWatcher(alert_queue)
whales_watcher.run()
if __name__ == "__main__":
# Create a queue for passing whale alerts between threads
whale_alert_queue = queue.Queue()
# Start WhalesWatcher in a separate thread
watcher_thread = threading.Thread(target=start_whales_watcher, args=(whale_alert_queue,))
watcher_thread.daemon = True
# Start the watcher thread
watcher_thread.start()
# Define exchanges for the chart
exchange_ids = ['binance', 'kraken', 'coinbase', 'bitfinex', 'huobi', 'kucoin', 'bybit', 'okx', 'gateio', 'mexc']
# Start the chart viewer with alert queue
chart_view = ChartView(exchange_ids, alert_queue=whale_alert_queue)
# Run the GUI (this will block until the window is closed)
chart_view.start_gui()

View File

@@ -4,7 +4,7 @@ from sklearn.metrics import confusion_matrix
if __name__ == "__main__":
model = load_model('models/model_2025-01-21_04-49-43.h5')
predictor = BitcoinPricePredictor(model=model, db_path='bitcoin_historical_data.db')
predictor = BitcoinPricePredictor(model=model, db_path='databases/bitcoin_historical_data.db')
missing_data = predictor.load_new_data_from_model()

1903
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,2 +0,0 @@
[virtualenvs]
in-project = false

View File

@@ -1,30 +0,0 @@
[project]
name = "cryptomarketparser"
version = "0.1.0"
description = ""
authors = [
{name = "Simon Moisy",email = "simon.moisy@tutanota.com"}
]
readme = "README.md"
requires-python = ">=3.10,<4.0"
dependencies = [
"numpy (>=1.26.0,<2.2.0)",
"pandas (>=2.2.3,<3.0.0)",
"sqlalchemy (>=2.0.39,<3.0.0)",
"scipy (>=1.15.2,<2.0.0)",
"matplotlib (>=3.10.1,<4.0.0)",
"scikit-learn (>=1.6.1,<2.0.0)",
"ollama (>=0.4.7,<0.5.0)",
"transformers (>=4.49.0,<5.0.0)",
"markdownify (>=1.1.0,<2.0.0)"
]
[build-system]
requires = ["poetry-core>=2.0.0,<3.0.0"]
build-backend = "poetry.core.masonry.api"
[[tool.poetry.source]]
name = "pytorch"
url = "https://download.pytorch.org/whl/cu121"
priority = "explicit"

BIN
requirements.txt Normal file

Binary file not shown.

676
trend_detector_simple.py Normal file
View File

@@ -0,0 +1,676 @@
import pandas as pd
import numpy as np
import logging
from scipy.signal import find_peaks
from matplotlib.patches import Rectangle
from scipy import stats
from scipy import stats
# Color configuration
# Plot colors
DARK_BG_COLOR = '#181C27'
LEGEND_BG_COLOR = '#333333'
TITLE_COLOR = 'white'
AXIS_LABEL_COLOR = 'white'
# Candlestick colors
CANDLE_UP_COLOR = '#089981' # Green
CANDLE_DOWN_COLOR = '#F23645' # Red
# Marker colors
MIN_COLOR = 'red'
MAX_COLOR = 'green'
# Line style colors
MIN_LINE_STYLE = 'g--' # Green dashed
MAX_LINE_STYLE = 'r--' # Red dashed
SMA7_LINE_STYLE = 'y-' # Yellow solid
SMA15_LINE_STYLE = 'm-' # Magenta solid
# SuperTrend colors
ST_COLOR_UP = 'g-'
ST_COLOR_DOWN = 'r-'
class TrendDetectorSimple:
def __init__(self, data, verbose=False):
"""
Initialize the TrendDetectorSimple class.
Parameters:
- data: pandas DataFrame containing price data
- verbose: boolean, whether to display detailed logging information
"""
self.data = data
self.verbose = verbose
# Plot style configuration
self.plot_style = 'dark_background'
self.bg_color = DARK_BG_COLOR
self.plot_size = (12, 8)
# Candlestick configuration
self.candle_width = 0.6
self.candle_up_color = CANDLE_UP_COLOR
self.candle_down_color = CANDLE_DOWN_COLOR
self.candle_alpha = 0.8
self.wick_width = 1
# Marker configuration
self.min_marker = '^'
self.min_color = MIN_COLOR
self.min_size = 100
self.max_marker = 'v'
self.max_color = MAX_COLOR
self.max_size = 100
self.marker_zorder = 100
# Line configuration
self.line_width = 1
self.min_line_style = MIN_LINE_STYLE
self.max_line_style = MAX_LINE_STYLE
self.sma7_line_style = SMA7_LINE_STYLE
self.sma15_line_style = SMA15_LINE_STYLE
# Text configuration
self.title_size = 14
self.title_color = TITLE_COLOR
self.axis_label_size = 12
self.axis_label_color = AXIS_LABEL_COLOR
# Legend configuration
self.legend_loc = 'best'
self.legend_bg_color = LEGEND_BG_COLOR
# Configure logging
logging.basicConfig(level=logging.INFO if verbose else logging.WARNING,
format='%(asctime)s - %(levelname)s - %(message)s')
self.logger = logging.getLogger('TrendDetectorSimple')
# Convert data to pandas DataFrame if it's not already
if not isinstance(self.data, pd.DataFrame):
if isinstance(self.data, list):
self.data = pd.DataFrame({'close': self.data})
else:
raise ValueError("Data must be a pandas DataFrame or a list")
self.logger.info(f"Initialized TrendDetectorSimple with {len(self.data)} data points")
def calculate_tr(self):
"""
Calculate True Range (TR) for the price data.
True Range is the greatest of:
1. Current high - current low
2. |Current high - previous close|
3. |Current low - previous close|
Returns:
- Numpy array of TR values
"""
df = self.data.copy()
high = df['high'].values
low = df['low'].values
close = df['close'].values
tr = np.zeros_like(close)
tr[0] = high[0] - low[0] # First TR is just the first day's range
for i in range(1, len(close)):
# Current high - current low
hl_range = high[i] - low[i]
# |Current high - previous close|
hc_range = abs(high[i] - close[i-1])
# |Current low - previous close|
lc_range = abs(low[i] - close[i-1])
# TR is the maximum of these three values
tr[i] = max(hl_range, hc_range, lc_range)
return tr
def calculate_atr(self, period=14):
"""
Calculate Average True Range (ATR) for the price data.
ATR is the exponential moving average of the True Range over a specified period.
Parameters:
- period: int, the period for the ATR calculation (default: 14)
Returns:
- Numpy array of ATR values
"""
tr = self.calculate_tr()
atr = np.zeros_like(tr)
# First ATR value is just the first TR
atr[0] = tr[0]
# Calculate exponential moving average (EMA) of TR
multiplier = 2.0 / (period + 1)
for i in range(1, len(tr)):
atr[i] = (tr[i] * multiplier) + (atr[i-1] * (1 - multiplier))
return atr
def detect_trends(self):
"""
Detect trends by identifying local minima and maxima in the price data
using scipy.signal.find_peaks.
Parameters:
- prominence: float, required prominence of peaks (relative to the price range)
- width: int, required width of peaks in data points
Returns:
- DataFrame with columns for timestamps, prices, and trend indicators
- Dictionary containing analysis results including linear regression, SMAs, and SuperTrend indicators
"""
df = self.data.copy()
close_prices = df['close'].values
# Find peaks in the price data
max_peaks, _ = find_peaks(close_prices)
min_peaks, _ = find_peaks(-close_prices)
# Create boolean columns for min and max peaks using vectorized operations
df['is_max'] = False
df['is_min'] = False
df.iloc[max_peaks, df.columns.get_loc('is_max')] = True
df.iloc[min_peaks, df.columns.get_loc('is_min')] = True
result = df[['datetime', 'close', 'is_min', 'is_max']].copy()
# Perform linear regression on min_peaks and max_peaks
min_prices = df['close'].iloc[min_peaks].values
max_prices = df['close'].iloc[max_peaks].values
# Linear regression for min peaks if we have at least 2 points
min_slope, min_intercept, min_r_value, _, _ = stats.linregress(min_peaks, min_prices)
# Linear regression for max peaks if we have at least 2 points
max_slope, max_intercept, max_r_value, _, _ = stats.linregress(max_peaks, max_prices)
# Calculate Simple Moving Averages (SMA) for 7 and 15 periods
sma_7 = pd.Series(close_prices).rolling(window=7, min_periods=1).mean().values
sma_15 = pd.Series(close_prices).rolling(window=15, min_periods=1).mean().values
analysis_results = {}
analysis_results['linear_regression'] = {
'min': {
'slope': min_slope,
'intercept': min_intercept,
'r_squared': min_r_value ** 2
},
'max': {
'slope': max_slope,
'intercept': max_intercept,
'r_squared': max_r_value ** 2
}
}
analysis_results['sma'] = {
'7': sma_7,
'15': sma_15
}
# Calculate SuperTrend indicators
supertrend_results_list = self._calculate_supertrend_indicators()
meta_results = self.calculate_metasupertrend(df, supertrend_results_list)
analysis_results['supertrend'] = supertrend_results_list
analysis_results['metasupertrend'] = meta_results
return result, analysis_results
def _calculate_supertrend_indicators(self):
"""
Calculate SuperTrend indicators with different parameter sets.
Returns:
- list, the SuperTrend results
"""
supertrend_params = [
{"period": 12, "multiplier": 3.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
{"period": 10, "multiplier": 1.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN},
{"period": 11, "multiplier": 2.0, "color_up": ST_COLOR_UP, "color_down": ST_COLOR_DOWN}
]
supertrend_results_list = []
for params in supertrend_params:
supertrend_results = self.calculate_supertrend(
period=params["period"],
multiplier=params["multiplier"]
)
supertrend_results_list.append({
"results": supertrend_results,
"params": params
})
return supertrend_results_list
def calculate_supertrend(self, period, multiplier):
"""
Calculate SuperTrend indicator for the price data.
SuperTrend is a trend-following indicator that uses ATR to determine the trend direction.
Parameters:
- period: int, the period for the ATR calculation (default: 10)
- multiplier: float, the multiplier for the ATR (default: 3.0)
Returns:
- Dictionary containing SuperTrend values, trend direction, and upper/lower bands
"""
df = self.data.copy()
high = df['high'].values
low = df['low'].values
close = df['close'].values
# Calculate ATR
atr = self.calculate_atr(period)
# Calculate basic upper and lower bands
upper_band = np.zeros_like(close)
lower_band = np.zeros_like(close)
for i in range(len(close)):
# Calculate the basic bands
hl_avg = (high[i] + low[i]) / 2
upper_band[i] = hl_avg + (multiplier * atr[i])
lower_band[i] = hl_avg - (multiplier * atr[i])
# Calculate final upper and lower bands with trend logic
final_upper = np.zeros_like(close)
final_lower = np.zeros_like(close)
supertrend = np.zeros_like(close)
trend = np.zeros_like(close) # 1 for uptrend, -1 for downtrend
# Initialize first values
final_upper[0] = upper_band[0]
final_lower[0] = lower_band[0]
# If close price is above upper band, we're in a downtrend (ST = upper band)
# If close price is below lower band, we're in an uptrend (ST = lower band)
if close[0] <= upper_band[0]:
supertrend[0] = upper_band[0]
trend[0] = -1 # Downtrend
else:
supertrend[0] = lower_band[0]
trend[0] = 1 # Uptrend
# Calculate SuperTrend for the rest of the data
for i in range(1, len(close)):
# Calculate final upper band
if (upper_band[i] < final_upper[i-1]) or (close[i-1] > final_upper[i-1]):
final_upper[i] = upper_band[i]
else:
final_upper[i] = final_upper[i-1]
# Calculate final lower band
if (lower_band[i] > final_lower[i-1]) or (close[i-1] < final_lower[i-1]):
final_lower[i] = lower_band[i]
else:
final_lower[i] = final_lower[i-1]
# Determine trend and SuperTrend value
if supertrend[i-1] == final_upper[i-1] and close[i] <= final_upper[i]:
# Continuing downtrend
supertrend[i] = final_upper[i]
trend[i] = -1
elif supertrend[i-1] == final_upper[i-1] and close[i] > final_upper[i]:
# Switching to uptrend
supertrend[i] = final_lower[i]
trend[i] = 1
elif supertrend[i-1] == final_lower[i-1] and close[i] >= final_lower[i]:
# Continuing uptrend
supertrend[i] = final_lower[i]
trend[i] = 1
elif supertrend[i-1] == final_lower[i-1] and close[i] < final_lower[i]:
# Switching to downtrend
supertrend[i] = final_upper[i]
trend[i] = -1
# Prepare result
supertrend_results = {
'supertrend': supertrend,
'trend': trend,
'upper_band': final_upper,
'lower_band': final_lower
}
return supertrend_results
def plot_trends(self, trend_data, analysis_results, view="both"):
"""
Plot the price data with detected trends using a candlestick chart.
Also plots SuperTrend indicators with three different parameter sets.
Parameters:
- trend_data: DataFrame, the output from detect_trends()
- analysis_results: Dictionary containing analysis results from detect_trends()
- view: str, one of 'both', 'trend', 'supertrend'; determines which plot(s) to display
Returns:
- None (displays the plot)
"""
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
plt.style.use(self.plot_style)
if view == "both":
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(self.plot_size[0]*2, self.plot_size[1]))
else:
fig, ax = plt.subplots(figsize=self.plot_size)
ax1 = ax2 = None
if view == "trend":
ax1 = ax
elif view == "supertrend":
ax2 = ax
fig.patch.set_facecolor(self.bg_color)
if ax1: ax1.set_facecolor(self.bg_color)
if ax2: ax2.set_facecolor(self.bg_color)
df = self.data.copy()
if ax1:
self._plot_trend_analysis(ax1, df, trend_data, analysis_results)
if ax2:
self._plot_supertrend_analysis(ax2, df, analysis_results)
plt.tight_layout()
plt.show()
def _plot_candlesticks(self, ax, df):
"""
Plot candlesticks on the given axis.
Parameters:
- ax: matplotlib.axes.Axes, the axis to plot on
- df: pandas.DataFrame, the data to plot
"""
from matplotlib.patches import Rectangle
for i in range(len(df)):
# Get OHLC values for this candle
open_val = df['open'].iloc[i]
close_val = df['close'].iloc[i]
high_val = df['high'].iloc[i]
low_val = df['low'].iloc[i]
# Determine candle color
color = self.candle_up_color if close_val >= open_val else self.candle_down_color
# Plot candle body
body_height = abs(close_val - open_val)
bottom = min(open_val, close_val)
rect = Rectangle((i - self.candle_width/2, bottom), self.candle_width, body_height,
color=color, alpha=self.candle_alpha)
ax.add_patch(rect)
# Plot candle wicks
ax.plot([i, i], [low_val, high_val], color=color, linewidth=self.wick_width)
def _plot_trend_analysis(self, ax, df, trend_data, analysis_results):
"""
Plot trend analysis on the given axis.
Parameters:
- ax: matplotlib.axes.Axes, the axis to plot on
- df: pandas.DataFrame, the data to plot
- trend_data: pandas.DataFrame, the trend data
- analysis_results: dict, the analysis results
"""
# Draw candlesticks
self._plot_candlesticks(ax, df)
# Plot minima and maxima points
self._plot_min_max_points(ax, df, trend_data)
# Plot trend lines and moving averages
if analysis_results:
self._plot_trend_lines(ax, df, analysis_results)
# Configure the subplot
self._configure_subplot(ax, 'Price Chart with Trend Analysis', len(df))
def _plot_min_max_points(self, ax, df, trend_data):
"""
Plot minimum and maximum points on the given axis.
Parameters:
- ax: matplotlib.axes.Axes, the axis to plot on
- df: pandas.DataFrame, the data to plot
- trend_data: pandas.DataFrame, the trend data
"""
min_indices = trend_data.index[trend_data['is_min'] == True].tolist()
if min_indices:
min_y = [df['close'].iloc[i] for i in min_indices]
ax.scatter(min_indices, min_y, color=self.min_color, s=self.min_size,
marker=self.min_marker, label='Local Minima', zorder=self.marker_zorder)
max_indices = trend_data.index[trend_data['is_max'] == True].tolist()
if max_indices:
max_y = [df['close'].iloc[i] for i in max_indices]
ax.scatter(max_indices, max_y, color=self.max_color, s=self.max_size,
marker=self.max_marker, label='Local Maxima', zorder=self.marker_zorder)
def _plot_trend_lines(self, ax, df, analysis_results):
"""
Plot trend lines on the given axis.
Parameters:
- ax: matplotlib.axes.Axes, the axis to plot on
- df: pandas.DataFrame, the data to plot
- analysis_results: dict, the analysis results
"""
x_vals = np.arange(len(df))
# Minima regression line (support)
min_slope = analysis_results['linear_regression']['min']['slope']
min_intercept = analysis_results['linear_regression']['min']['intercept']
min_line = min_slope * x_vals + min_intercept
ax.plot(x_vals, min_line, self.min_line_style, linewidth=self.line_width,
label='Minima Regression')
# Maxima regression line (resistance)
max_slope = analysis_results['linear_regression']['max']['slope']
max_intercept = analysis_results['linear_regression']['max']['intercept']
max_line = max_slope * x_vals + max_intercept
ax.plot(x_vals, max_line, self.max_line_style, linewidth=self.line_width,
label='Maxima Regression')
# SMA-7 line
sma_7 = analysis_results['sma']['7']
ax.plot(x_vals, sma_7, self.sma7_line_style, linewidth=self.line_width,
label='SMA-7')
# SMA-15 line
sma_15 = analysis_results['sma']['15']
valid_idx_15 = ~np.isnan(sma_15)
ax.plot(x_vals[valid_idx_15], sma_15[valid_idx_15], self.sma15_line_style,
linewidth=self.line_width, label='SMA-15')
def _configure_subplot(self, ax, title, data_length):
"""
Configure the subplot with title, labels, limits, and legend.
Parameters:
- ax: matplotlib.axes.Axes, the axis to configure
- title: str, the title of the subplot
- data_length: int, the length of the data
"""
# Set title and labels
ax.set_title(title, fontsize=self.title_size, color=self.title_color)
ax.set_xlabel('Date', fontsize=self.axis_label_size, color=self.axis_label_color)
ax.set_ylabel('Price', fontsize=self.axis_label_size, color=self.axis_label_color)
# Set appropriate x-axis limits
ax.set_xlim(-0.5, data_length - 0.5)
# Add a legend
ax.legend(loc=self.legend_loc, facecolor=self.legend_bg_color)
def _plot_supertrend_analysis(self, ax, df, analysis_results=None):
"""
Plot SuperTrend analysis on the given axis.
Parameters:
- ax: matplotlib.axes.Axes, the axis to plot on
- df: pandas.DataFrame, the data to plot
- supertrend_results_list: list, the SuperTrend results (optional)
"""
self._plot_candlesticks(ax, df)
self._plot_supertrend_lines(ax, df, analysis_results['supertrend'], style='Both')
self._configure_subplot(ax, 'Multiple SuperTrend Indicators', len(df))
def _plot_supertrend_lines(self, ax, df, analysis_results, style="Horizontal"):
"""
Plot SuperTrend lines on the given axis.
Parameters:
- ax: matplotlib.axes.Axes, the axis to plot on
- df: pandas.DataFrame, the data to plot
- supertrend_results_list: list, the SuperTrend results
"""
x_vals = np.arange(len(df))
if style == 'Horizontal' or style == 'Both':
if len(analysis_results) != 3:
raise ValueError("Expected exactly 3 SuperTrend results for meta calculation")
trends = [st["results"]["trend"] for st in analysis_results]
band_height = 0.02 * (df["high"].max() - df["low"].min())
y_base = df["low"].min() - band_height * 1.5
prev_color = None
for i in range(1, len(x_vals)):
t_vals = [t[i] for t in trends]
up_count = t_vals.count(1)
down_count = t_vals.count(-1)
if down_count == 3:
color = "red"
elif down_count == 2 and up_count == 1:
color = "orange"
elif down_count == 1 and up_count == 2:
color = "yellow"
elif up_count == 3:
color = "green"
else:
continue # skip if unknown or inconsistent values
ax.add_patch(Rectangle(
(x_vals[i-1], y_base),
1,
band_height,
color=color,
linewidth=0,
alpha=0.6
))
# Draw a vertical line at the change of color
if prev_color and prev_color != color:
ax.axvline(x_vals[i-1], color="grey", alpha=0.3, linewidth=1)
prev_color = color
ax.set_ylim(bottom=y_base - band_height * 0.5)
if style == 'Curves' or style == 'Both':
for st in analysis_results:
params = st["params"]
results = st["results"]
supertrend = results["supertrend"]
trend = results["trend"]
# Plot SuperTrend line with color based on trend
for i in range(1, len(x_vals)):
if trend[i] == 1: # Uptrend
ax.plot(x_vals[i-1:i+1], supertrend[i-1:i+1], params["color_up"], linewidth=self.line_width)
else: # Downtrend
ax.plot(x_vals[i-1:i+1], supertrend[i-1:i+1], params["color_down"], linewidth=self.line_width)
self._plot_metasupertrend_lines(ax, df, analysis_results)
self._add_supertrend_legend(ax, analysis_results)
def _plot_metasupertrend_lines(self, ax, df, analysis_results):
"""
Plot a Meta SuperTrend line where all individual SuperTrends agree on trend.
Parameters:
- ax: matplotlib.axes.Axes, the axis to plot on
- df: pandas.DataFrame, the data to plot
- supertrend_results_list: list, each item contains SuperTrend 'results' and 'params'
"""
x_vals = np.arange(len(df))
meta_results = self.calculate_metasupertrend(df, analysis_results)
params = analysis_results[0]["params"] # Use first config for styling
for i in meta_results['consensus_points']:
if i > 0: # Skip first point as we need a previous point to draw a line
if i-1 in meta_results['consensus_points']: # Only draw if previous point was also a consensus
meta_trend = meta_results['meta_trends'][i]
color = params["color_up"] if meta_trend == 1 else params["color_down"]
ax.plot(x_vals[i-1:i+1],
[meta_results['meta_values'][i-1], meta_results['meta_values'][i]],
color, linewidth=self.line_width)
def _add_supertrend_legend(self, ax, supertrend_results_list):
"""
Add SuperTrend legend entries to the given axis.
Parameters:
- ax: matplotlib.axes.Axes, the axis to add legend entries to
- supertrend_results_list: list, the SuperTrend results
"""
for st in supertrend_results_list:
params = st["params"]
period = params["period"]
multiplier = params["multiplier"]
color_up = params["color_up"]
color_down = params["color_down"]
ax.plot([], [], color_up, linewidth=self.line_width,
label=f'ST (P:{period}, M:{multiplier}) Up')
ax.plot([], [], color_down, linewidth=self.line_width,
label=f'ST (P:{period}, M:{multiplier}) Down')
def calculate_metasupertrend(self, df, supertrend_results_list):
"""
Calculate Meta SuperTrend values where all individual SuperTrends agree on trend.
Parameters:
- df: pandas.DataFrame, the data containing price information
- supertrend_results_list: list, each item contains SuperTrend 'results' and 'params'
Returns:
- dict containing:
- meta_trends: list of trend values (1 for uptrend, -1 for downtrend, 0 for no consensus)
- meta_values: list of averaged supertrend values where trends agree
- consensus_points: list of indices where all trends agree
"""
if len(supertrend_results_list) != 3:
raise ValueError("Expected exactly 3 SuperTrend results for meta calculation")
trends = [st["results"]["trend"] for st in supertrend_results_list]
supertrends = [st["results"]["supertrend"] for st in supertrend_results_list]
data_length = len(df)
meta_trends = np.zeros(data_length) # 0 means no consensus
meta_values = np.zeros(data_length)
consensus_points = []
for i in range(1, data_length):
t1, t2, t3 = trends[0][i], trends[1][i], trends[2][i]
if t1 == t2 == t3:
meta_trends[i] = t1
meta_values[i] = np.mean([s[i] for s in supertrends])
consensus_points.append(i)
return {
'meta_trends': meta_trends,
'meta_values': meta_values,
'consensus_points': consensus_points
}

View File

@@ -0,0 +1,48 @@
import pandas as pd
from datetime import datetime
def aggregate_to_daily(input_csv, output_csv):
"""
Load a CSV file with Bitcoin price data, aggregate to daily values, and save to a new CSV.
Args:
input_csv (str): Path to the input CSV file
output_csv (str): Path to save the output CSV file
"""
try:
# Read the CSV file
print(f"Loading data from {input_csv}...")
df = pd.read_csv(input_csv)
# Convert timestamp to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
df.set_index('Timestamp', inplace=True)
# Aggregate to daily values
print("Aggregating data to daily values...")
daily_df = df.resample('D').agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last',
'Volume': 'sum'
})
# Reset index to make Timestamp a column
daily_df.reset_index(inplace=True)
# Save to new CSV
print(f"Saving daily data to {output_csv}...")
daily_df.to_csv(output_csv, index=False)
print(f"Successfully processed {len(df)} records into {len(daily_df)} daily records")
except Exception as e:
print(f"Error processing data: {str(e)}")
if __name__ == "__main__":
# Example usage
input_file = "../data/btcusd_1-min_data.csv" # Update this path to your input file
output_file = "../data/btcusd_daily_data.csv" # Update this path to your desired output file
aggregate_to_daily(input_file, output_file)

View File

@@ -2,7 +2,7 @@ import sqlite3
from datetime import datetime
# Specify the database file path
db_path = 'bitcoin_historical_data.db'
db_path = 'databases/bitcoin_historical_data.db'
# Create a connection to the database
connection = sqlite3.connect(db_path)

View File

@@ -12,7 +12,7 @@ df.set_index('Timestamp', inplace=True)
df = df[~df.index.isna()]
# Create a connection to the SQLite database
engine = create_engine('sqlite:///bitcoin_historical_data.db')
engine = create_engine('sqlite:///databases/bitcoin_historical_data.db')
# Check if the table already exists and get the last timestamp from the database
with engine.connect() as connection: