chunked content to 512

This commit is contained in:
Simon Moisy 2025-03-22 08:00:58 +08:00
parent 3864d7e93c
commit 679f1bd941

View File

@ -44,33 +44,41 @@ if __name__ == "__main__":
Base.metadata.create_all(engine) Base.metadata.create_all(engine)
result = analyzer.classify_article_finbert("Strong earning growth and expending market shares have positionned the company for long term success.") # result = analyzer.classify_article_finbert("Strong earning growth and expending market shares have positionned the company for long term success.")
print(f'result {result}') # print(f'result {result}')
for file, content in html_files.items():
# for file, content in html_files.items(): chunk_size = 512
# result = analyzer.classify_article_finbert(content) chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
# filename = os.path.basename(file)
# print(f'result {result}')
# label = result[0]['label']
# score = result[0]['score']
# analysis = ArticleAnalysis(filename=filename, label=label, score=score)
# try: results = []
# session.add(analysis) for chunk in chunks:
# session.commit() if chunk.strip():
# except: chunk_result = analyzer.classify_article_finbert(chunk)
# session.rollback() results.extend(chunk_result)
result = results if results else [{'label': 'neutral', 'score': 0.0}]
# existing = session.query(ArticleAnalysis).filter_by(filename=filename).first() filename = os.path.basename(file)
# if existing: print(f'result {result}')
# existing.label = label
# existing.score = score
# session.commit()
# finally:
# session.close()
# print(f"article [{file}] - analyzed as [{result}]\n") # label = result[0]['label']
# score = result[0]['score']
# analysis = ArticleAnalysis(filename=filename, label=label, score=score)
# try:
# session.add(analysis)
# session.commit()
# except:
# session.rollback()
# existing = session.query(ArticleAnalysis).filter_by(filename=filename).first()
# if existing:
# existing.label = label
# existing.score = score
# session.commit()
# finally:
# session.close()
# print(f"article [{file}] - analyzed as [{result}]\n")