from enum import Enum from finBERT.finbert.finbert import predict from transformers import AutoModelForSequenceClassification import ollama from pydantic import BaseModel import markdownify class Category(str, Enum): REGULATORY_NEWS = "Regulatory News" INSTITUTIONAL_ADOPTION = "Institutional Adoption" MARKET_SENTIMENT = "Market Sentiment" MACROECONOMIC_FACTORS = "Macroeconomic Factors" SECURITY_HACKS = "Security & Hacks" TECHNOLOGICAL_DEVELOPMENTS = "Technological Developments" WHALE_EXCHANGE_ACTIVITY = "Whale & Exchange Activity" class Sentiment(str, Enum): POSITIVE = "Positive" NEUTRAL = "Neutral" NEGATIVE = "Negative" class ArticleClassification(BaseModel): category: Category sentiment: Sentiment class ArticleAnalyzer: def __init__(self): self.model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert", num_labels=3, cache_dir=None) self.model.to("cuda") self.base_prompt = """ Classify the following article into one of these categories: - Regulatory News - Institutional Adoption - Market Sentiment - Macroeconomic Factors - Security & Hacks - Technological Developments - Whale & Exchange Activity Also, assign a sentiment (Positive, Neutral or Negative) """ print(f"This JSON model is going to be used for structured output classification : {ArticleClassification.model_json_schema()}") @staticmethod def convert_to_markdown(html_content: str) -> str: """ Convert HTML content to Markdown format. Args: html_content: Cleaned HTML content Returns: Markdown content """ return markdownify.markdownify( html_content, strip=["script", "style", "img", "svg"], strip_tags=True, heading_style="atx", code_block=True ) def classify_article_llm(self, article_text): prompt = f"""{self.base_prompt} ARTICLE: {article_text} OUTPUT FORMAT: Category: Sentiment: """ response = ollama.chat(model="llama3.2", messages=[{"role": "user", "content": prompt}], format=ArticleClassification.model_json_schema()) return response['message']['content'] def classify_article_finbert(self, article_html): article_md = self.convert_to_markdown(article_html) results = predict(article_md, model=self.model, use_gpu=True) return results