added storage to db

This commit is contained in:
Simon Moisy 2025-03-21 17:39:30 +08:00
parent e1465539d2
commit 68e376ef32

View File

@ -1,7 +1,18 @@
import os import os
from sqlalchemy import create_engine, Column, String, Float, MetaData, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from article_analyzer import ArticleAnalyzer from article_analyzer import ArticleAnalyzer
Base = declarative_base()
class ArticleAnalysis(Base):
__tablename__ = 'article_analyses'
filename = Column(String, primary_key=True)
label = Column(String)
score = Column(Float)
def read_html_files(folder_path): def read_html_files(folder_path):
html_contents = {} html_contents = {}
for root, _, files in os.walk(folder_path): for root, _, files in os.walk(folder_path):
@ -16,9 +27,38 @@ def read_html_files(folder_path):
if __name__ == "__main__": if __name__ == "__main__":
analyzer = ArticleAnalyzer() analyzer = ArticleAnalyzer()
engine = create_engine('sqlite:///article_analysis.db')
Session = sessionmaker(bind=engine)
session = Session()
html_files = read_html_files("./data") html_files = read_html_files("./data")
print(f"Parsed {len(html_files)} html files") print(f"Parsed {len(html_files)} html files")
Base.metadata.create_all(engine)
for file, content in html_files.items(): for file, content in html_files.items():
result = analyzer.classify_article_finbert(content) result = analyzer.classify_article_finbert(content)
filename = os.path.basename(file)
label = result[0]['label']
score = result[0]['score']
analysis = ArticleAnalysis(filename=filename, label=label, score=score)
try:
session.add(analysis)
session.commit()
except:
session.rollback()
existing = session.query(ArticleAnalysis).filter_by(filename=filename).first()
if existing:
existing.label = label
existing.score = score
session.commit()
finally:
session.close()
print(f"article [{file}] - analyzed as [{result}]\n") print(f"article [{file}] - analyzed as [{result}]\n")