| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- import os
- import json
- import shutil
- import uuid
- from fastapi import UploadFile, HTTPException
- from config.settings import PDF_SAVE_DIR, JSON_CACHE_DIR
- from parser.base_parser import get_file_hash
- from parser.ventilation_plan_parser import VentilationPlanParser
- def parse_pdf(file: UploadFile) -> dict:
- file_path = f"{PDF_SAVE_DIR}/{uuid.uuid4()}.pdf"
- with open(file_path, "wb") as f:
- f.write(file.file.read())
- hash_id = get_file_hash(file_path)
- new_pdf_path = rename_pdf_by_hash(file_path,hash_id)
- cache_path = f"{JSON_CACHE_DIR}/{hash_id}.json"
- if os.path.exists(cache_path):
- with open(cache_path, "r", encoding="utf-8") as f:
- return json.load(f)
- data = VentilationPlanParser().parse(new_pdf_path)
- with open(cache_path, "w", encoding="utf-8") as f:
- json.dump(data, f, ensure_ascii=False, indent=2)
- return data
- def rename_pdf_by_hash(original_pdf_path: str, hash_id: str) -> str:
- """
- 根据hash_id重命名PDF文件
- :param original_pdf_path: 原PDF文件路径
- :param hash_id: 生成的唯一hash标识
- :return: 新文件完整路径
- """
- # 拼接新文件名:目录 + hash_id.pdf
- new_file_name = f"{hash_id}.pdf"
- new_pdf_path = os.path.join(PDF_SAVE_DIR, new_file_name)
- # 执行重命名
- shutil.move(original_pdf_path, new_pdf_path)
- return new_pdf_path
- def get_latest_file(dir_path: str, suffix: str) -> str:
- """
- 获取目录下修改时间最新的指定后缀文件
- :param dir_path: 目标文件夹路径
- :param suffix: 文件后缀,例: .json、.pdf、.txt
- :return: 最新文件完整路径
- """
- # 校验目录是否存在
- if not os.path.isdir(dir_path):
- raise HTTPException(status_code=404, detail="文件夹不存在")
- # 筛选对应后缀文件
- file_list = []
- for file in os.listdir(dir_path):
- file_full_path = os.path.join(dir_path, file)
- if os.path.isfile(file_full_path) and file.lower().endswith(suffix.lower()):
- mtime = os.path.getmtime(file_full_path)
- file_list.append((mtime, file_full_path))
- if not file_list:
- raise HTTPException(status_code=404, detail=f"目录下未找到 {suffix} 格式文件")
- # 按修改时间倒序,取最新文件
- file_list.sort(reverse=True, key=lambda x: x[0])
- return file_list[0][1]
|