From a1cd2a0b06e99a8c669933bf9fa437f469705266 Mon Sep 17 00:00:00 2001 From: Simon Moisy Date: Tue, 18 Mar 2025 10:55:35 +0800 Subject: [PATCH] first attempt with an article analyzer using ollama and (structured output) --- article_analyzer.py | 47 ++++++++++++++++++++++++++++++++++++++++ main.py | 16 -------------- main_article_analyzer.py | 24 ++++++++++++++++++++ main_price_predictor.py | 8 +++++++ main_trend_analysis.py | 6 +++++ 5 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 article_analyzer.py delete mode 100644 main.py create mode 100644 main_article_analyzer.py create mode 100644 main_price_predictor.py create mode 100644 main_trend_analysis.py diff --git a/article_analyzer.py b/article_analyzer.py new file mode 100644 index 0000000..bbef80f --- /dev/null +++ b/article_analyzer.py @@ -0,0 +1,47 @@ +from enum import Enum + +import ollama +from pydantic import BaseModel + +class Category(str, Enum): + REGULATORY_NEWS = "Regulatory News" + INSTITUTIONAL_ADOPTION = "Institutional Adoption" + MARKET_SENTIMENT = "Market Sentiment" + MACROECONOMIC_FACTORS = "Macroeconomic Factors" + SECURITY_HACKS = "Security & Hacks" + TECHNOLOGICAL_DEVELOPMENTS = "Technological Developments" + WHALE_EXCHANGE_ACTIVITY = "Whale & Exchange Activity" + +class ArticleClassification(BaseModel): + category: Category + sentiment: int + +class ArticleAnalyzer: + def __init__(self): + self.base_prompt = """ + Classify the following article into one of these categories: + - Regulatory News + - Institutional Adoption + - Market Sentiment + - Macroeconomic Factors + - Security & Hacks + - Technological Developments + - Whale & Exchange Activity + + Also, assign a sentiment (1 for Positive, -1 for Negative, or 0 for Neutral). + """ + print(f"This JSON model is going to be used for structured ouput classification : {ArticleClassification.model_json_schema()}") + + def classify_article(self, article_text): + prompt = f"""{self.base_prompt} + + ARTICLE: {article_text} + + OUTPUT FORMAT: + Category: + Sentiment: + """ + response = ollama.chat(model="llama3.2", + messages=[{"role": "user", "content": prompt}], + format=ArticleClassification.model_json_schema()) + return response['message']['content'] \ No newline at end of file diff --git a/main.py b/main.py deleted file mode 100644 index 01db0d2..0000000 --- a/main.py +++ /dev/null @@ -1,16 +0,0 @@ -from BitcoinPricePredictor import BitcoinPricePredictor - -if __name__ == "__main__": - # For daily predictions (default) - predictor_daily = BitcoinPricePredictor(db_path='bitcoin_historical_data.db', timeframe='H') - - # For weekly predictions - # predictor_weekly = BitcoinPricePredictor(db_path='bitcoin_historical_data.db', timeframe='W') - - # Choose which predictor to use - predictor = predictor_daily - - predictor.load_and_prepare_data() - predictor.train_model() - predictor.evaluate_model() - predictor.plot_history() diff --git a/main_article_analyzer.py b/main_article_analyzer.py new file mode 100644 index 0000000..a13a9d8 --- /dev/null +++ b/main_article_analyzer.py @@ -0,0 +1,24 @@ +import os + +from article_analyzer import ArticleAnalyzer + +def read_html_files(folder_path): + html_contents = {} + for root, _, files in os.walk(folder_path): + for file in files: + if file.endswith(".html"): + file_path = os.path.join(root, file) + with open(file_path, "r", encoding="utf-8") as f: + html_contents[file_path] = f.read() + return html_contents + + +if __name__ == "__main__": + analyzer = ArticleAnalyzer() + + html_files = read_html_files("./data") + print(f"Parsed {len(html_files)} html files") + + for file, content in html_files.items(): + result = analyzer.classify_article(content) + print(f"article [{file}] - analyzed as [{result}]\n") diff --git a/main_price_predictor.py b/main_price_predictor.py new file mode 100644 index 0000000..d5bf9dc --- /dev/null +++ b/main_price_predictor.py @@ -0,0 +1,8 @@ +from BitcoinPricePredictor import BitcoinPricePredictor + +if __name__ == "__main__": + predictor = BitcoinPricePredictor(db_path='bitcoin_historical_data.db', timeframe='H') + predictor.load_data() + predictor.train_model() + predictor.evaluate_model() + predictor.plot_history() diff --git a/main_trend_analysis.py b/main_trend_analysis.py new file mode 100644 index 0000000..887732f --- /dev/null +++ b/main_trend_analysis.py @@ -0,0 +1,6 @@ +from bitcoin_trend_analysis import BitcoinTrendAnalysis + +if __name__ == "__main__": + ma = BitcoinTrendAnalysis(db_path='bitcoin_historical_data.db') + ma.load_data() + ma.analyze_trends_peaks(distance=1)