diff --git a/main_article_analyzer.py b/main_article_analyzer.py index ed658a0..36c8f50 100644 --- a/main_article_analyzer.py +++ b/main_article_analyzer.py @@ -4,8 +4,8 @@ from sqlalchemy import create_engine, Column, String, Float, MetaData, Table from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from article_analyzer import ArticleAnalyzer - -sys.path.append(os.path.join(os.path.dirname(__file__), 'finBERT', 'finbert')) +import nltk +import logging Base = declarative_base() @@ -28,6 +28,11 @@ def read_html_files(folder_path): if __name__ == "__main__": + # nltk.set_proxy('http://127.0.0.1:7890') + # nltk.download('punkt_tab') + + logging.basicConfig(level=logging.CRITICAL) + analyzer = ArticleAnalyzer() engine = create_engine('sqlite:///article_analysis.db') @@ -39,29 +44,33 @@ if __name__ == "__main__": Base.metadata.create_all(engine) - - for file, content in html_files.items(): - result = analyzer.classify_article_finbert(content) + result = analyzer.classify_article_finbert("Strong earning growth and expending market shares have positionned the company for long term success.") + print(f'result {result}') - filename = os.path.basename(file) + + # for file, content in html_files.items(): + # result = analyzer.classify_article_finbert(content) + + # filename = os.path.basename(file) + # print(f'result {result}') + + # label = result[0]['label'] + # score = result[0]['score'] + + # analysis = ArticleAnalysis(filename=filename, label=label, score=score) - label = result[0]['label'] - score = result[0]['score'] + # try: + # session.add(analysis) + # session.commit() + # except: + # session.rollback() - analysis = ArticleAnalysis(filename=filename, label=label, score=score) - - try: - session.add(analysis) - session.commit() - except: - session.rollback() + # existing = session.query(ArticleAnalysis).filter_by(filename=filename).first() + # if existing: + # existing.label = label + # existing.score = score + # session.commit() + # finally: + # session.close() - existing = session.query(ArticleAnalysis).filter_by(filename=filename).first() - if existing: - existing.label = label - existing.score = score - session.commit() - finally: - session.close() - - print(f"article [{file}] - analyzed as [{result}]\n") + # print(f"article [{file}] - analyzed as [{result}]\n")