first attempt with an article analyzer using ollama and (structured output)

This commit is contained in:
Simon Moisy 2025-03-18 10:55:35 +08:00
parent 6d9189d0be
commit a1cd2a0b06
5 changed files with 85 additions and 16 deletions

47
article_analyzer.py Normal file
View File

@ -0,0 +1,47 @@
from enum import Enum
import ollama
from pydantic import BaseModel
class Category(str, Enum):
REGULATORY_NEWS = "Regulatory News"
INSTITUTIONAL_ADOPTION = "Institutional Adoption"
MARKET_SENTIMENT = "Market Sentiment"
MACROECONOMIC_FACTORS = "Macroeconomic Factors"
SECURITY_HACKS = "Security & Hacks"
TECHNOLOGICAL_DEVELOPMENTS = "Technological Developments"
WHALE_EXCHANGE_ACTIVITY = "Whale & Exchange Activity"
class ArticleClassification(BaseModel):
category: Category
sentiment: int
class ArticleAnalyzer:
def __init__(self):
self.base_prompt = """
Classify the following article into one of these categories:
- Regulatory News
- Institutional Adoption
- Market Sentiment
- Macroeconomic Factors
- Security & Hacks
- Technological Developments
- Whale & Exchange Activity
Also, assign a sentiment (1 for Positive, -1 for Negative, or 0 for Neutral).
"""
print(f"This JSON model is going to be used for structured ouput classification : {ArticleClassification.model_json_schema()}")
def classify_article(self, article_text):
prompt = f"""{self.base_prompt}
ARTICLE: {article_text}
OUTPUT FORMAT:
Category: <category>
Sentiment: <sentiment>
"""
response = ollama.chat(model="llama3.2",
messages=[{"role": "user", "content": prompt}],
format=ArticleClassification.model_json_schema())
return response['message']['content']

16
main.py
View File

@ -1,16 +0,0 @@
from BitcoinPricePredictor import BitcoinPricePredictor
if __name__ == "__main__":
# For daily predictions (default)
predictor_daily = BitcoinPricePredictor(db_path='bitcoin_historical_data.db', timeframe='H')
# For weekly predictions
# predictor_weekly = BitcoinPricePredictor(db_path='bitcoin_historical_data.db', timeframe='W')
# Choose which predictor to use
predictor = predictor_daily
predictor.load_and_prepare_data()
predictor.train_model()
predictor.evaluate_model()
predictor.plot_history()

24
main_article_analyzer.py Normal file
View File

@ -0,0 +1,24 @@
import os
from article_analyzer import ArticleAnalyzer
def read_html_files(folder_path):
html_contents = {}
for root, _, files in os.walk(folder_path):
for file in files:
if file.endswith(".html"):
file_path = os.path.join(root, file)
with open(file_path, "r", encoding="utf-8") as f:
html_contents[file_path] = f.read()
return html_contents
if __name__ == "__main__":
analyzer = ArticleAnalyzer()
html_files = read_html_files("./data")
print(f"Parsed {len(html_files)} html files")
for file, content in html_files.items():
result = analyzer.classify_article(content)
print(f"article [{file}] - analyzed as [{result}]\n")

8
main_price_predictor.py Normal file
View File

@ -0,0 +1,8 @@
from BitcoinPricePredictor import BitcoinPricePredictor
if __name__ == "__main__":
predictor = BitcoinPricePredictor(db_path='bitcoin_historical_data.db', timeframe='H')
predictor.load_data()
predictor.train_model()
predictor.evaluate_model()
predictor.plot_history()

6
main_trend_analysis.py Normal file
View File

@ -0,0 +1,6 @@
from bitcoin_trend_analysis import BitcoinTrendAnalysis
if __name__ == "__main__":
ma = BitcoinTrendAnalysis(db_path='bitcoin_historical_data.db')
ma.load_data()
ma.analyze_trends_peaks(distance=1)