25 lines
752 B
Python
25 lines
752 B
Python
import os
|
|
|
|
from article_analyzer import ArticleAnalyzer
|
|
|
|
def read_html_files(folder_path):
|
|
html_contents = {}
|
|
for root, _, files in os.walk(folder_path):
|
|
for file in files:
|
|
if file.endswith(".html"):
|
|
file_path = os.path.join(root, file)
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
html_contents[file_path] = f.read()
|
|
return html_contents
|
|
|
|
|
|
if __name__ == "__main__":
|
|
analyzer = ArticleAnalyzer()
|
|
|
|
html_files = read_html_files("./data")
|
|
print(f"Parsed {len(html_files)} html files")
|
|
|
|
for file, content in html_files.items():
|
|
result = analyzer.classify_article_finbert(content)
|
|
print(f"article [{file}] - analyzed as [{result}]\n")
|