chunked content to 512
This commit is contained in:
parent
3864d7e93c
commit
679f1bd941
@ -44,33 +44,41 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
Base.metadata.create_all(engine)
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
result = analyzer.classify_article_finbert("Strong earning growth and expending market shares have positionned the company for long term success.")
|
# result = analyzer.classify_article_finbert("Strong earning growth and expending market shares have positionned the company for long term success.")
|
||||||
print(f'result {result}')
|
# print(f'result {result}')
|
||||||
|
|
||||||
|
for file, content in html_files.items():
|
||||||
# for file, content in html_files.items():
|
chunk_size = 512
|
||||||
# result = analyzer.classify_article_finbert(content)
|
chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
|
||||||
|
|
||||||
# filename = os.path.basename(file)
|
|
||||||
# print(f'result {result}')
|
|
||||||
|
|
||||||
# label = result[0]['label']
|
|
||||||
# score = result[0]['score']
|
|
||||||
|
|
||||||
# analysis = ArticleAnalysis(filename=filename, label=label, score=score)
|
|
||||||
|
|
||||||
# try:
|
results = []
|
||||||
# session.add(analysis)
|
for chunk in chunks:
|
||||||
# session.commit()
|
if chunk.strip():
|
||||||
# except:
|
chunk_result = analyzer.classify_article_finbert(chunk)
|
||||||
# session.rollback()
|
results.extend(chunk_result)
|
||||||
|
|
||||||
|
result = results if results else [{'label': 'neutral', 'score': 0.0}]
|
||||||
|
|
||||||
# existing = session.query(ArticleAnalysis).filter_by(filename=filename).first()
|
filename = os.path.basename(file)
|
||||||
# if existing:
|
print(f'result {result}')
|
||||||
# existing.label = label
|
|
||||||
# existing.score = score
|
|
||||||
# session.commit()
|
|
||||||
# finally:
|
|
||||||
# session.close()
|
|
||||||
|
|
||||||
# print(f"article [{file}] - analyzed as [{result}]\n")
|
# label = result[0]['label']
|
||||||
|
# score = result[0]['score']
|
||||||
|
|
||||||
|
# analysis = ArticleAnalysis(filename=filename, label=label, score=score)
|
||||||
|
|
||||||
|
# try:
|
||||||
|
# session.add(analysis)
|
||||||
|
# session.commit()
|
||||||
|
# except:
|
||||||
|
# session.rollback()
|
||||||
|
|
||||||
|
# existing = session.query(ArticleAnalysis).filter_by(filename=filename).first()
|
||||||
|
# if existing:
|
||||||
|
# existing.label = label
|
||||||
|
# existing.score = score
|
||||||
|
# session.commit()
|
||||||
|
# finally:
|
||||||
|
# session.close()
|
||||||
|
|
||||||
|
# print(f"article [{file}] - analyzed as [{result}]\n")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user