added storage to db
This commit is contained in:
parent
e1465539d2
commit
68e376ef32
@ -1,7 +1,18 @@
|
|||||||
import os
|
import os
|
||||||
|
from sqlalchemy import create_engine, Column, String, Float, MetaData, Table
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
from article_analyzer import ArticleAnalyzer
|
from article_analyzer import ArticleAnalyzer
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
class ArticleAnalysis(Base):
|
||||||
|
__tablename__ = 'article_analyses'
|
||||||
|
|
||||||
|
filename = Column(String, primary_key=True)
|
||||||
|
label = Column(String)
|
||||||
|
score = Column(Float)
|
||||||
|
|
||||||
def read_html_files(folder_path):
|
def read_html_files(folder_path):
|
||||||
html_contents = {}
|
html_contents = {}
|
||||||
for root, _, files in os.walk(folder_path):
|
for root, _, files in os.walk(folder_path):
|
||||||
@ -16,9 +27,38 @@ def read_html_files(folder_path):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
analyzer = ArticleAnalyzer()
|
analyzer = ArticleAnalyzer()
|
||||||
|
|
||||||
|
engine = create_engine('sqlite:///article_analysis.db')
|
||||||
|
Session = sessionmaker(bind=engine)
|
||||||
|
session = Session()
|
||||||
|
|
||||||
html_files = read_html_files("./data")
|
html_files = read_html_files("./data")
|
||||||
print(f"Parsed {len(html_files)} html files")
|
print(f"Parsed {len(html_files)} html files")
|
||||||
|
|
||||||
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
|
|
||||||
for file, content in html_files.items():
|
for file, content in html_files.items():
|
||||||
result = analyzer.classify_article_finbert(content)
|
result = analyzer.classify_article_finbert(content)
|
||||||
|
|
||||||
|
filename = os.path.basename(file)
|
||||||
|
|
||||||
|
label = result[0]['label']
|
||||||
|
score = result[0]['score']
|
||||||
|
|
||||||
|
analysis = ArticleAnalysis(filename=filename, label=label, score=score)
|
||||||
|
|
||||||
|
try:
|
||||||
|
session.add(analysis)
|
||||||
|
session.commit()
|
||||||
|
except:
|
||||||
|
session.rollback()
|
||||||
|
|
||||||
|
existing = session.query(ArticleAnalysis).filter_by(filename=filename).first()
|
||||||
|
if existing:
|
||||||
|
existing.label = label
|
||||||
|
existing.score = score
|
||||||
|
session.commit()
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
print(f"article [{file}] - analyzed as [{result}]\n")
|
print(f"article [{file}] - analyzed as [{result}]\n")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user