import os import json import shutil import uuid from fastapi import UploadFile, HTTPException from config.settings import PDF_SAVE_DIR, JSON_CACHE_DIR from parser.base_parser import get_file_hash from parser.ventilation_plan_parser import VentilationPlanParser def parse_pdf(file: UploadFile) -> dict: file_path = f"{PDF_SAVE_DIR}/{uuid.uuid4()}.pdf" with open(file_path, "wb") as f: f.write(file.file.read()) hash_id = get_file_hash(file_path) new_pdf_path = rename_pdf_by_hash(file_path,hash_id) cache_path = f"{JSON_CACHE_DIR}/{hash_id}.json" if os.path.exists(cache_path): with open(cache_path, "r", encoding="utf-8") as f: return json.load(f) data = VentilationPlanParser().parse(new_pdf_path) with open(cache_path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) return data def rename_pdf_by_hash(original_pdf_path: str, hash_id: str) -> str: """ 根据hash_id重命名PDF文件 :param original_pdf_path: 原PDF文件路径 :param hash_id: 生成的唯一hash标识 :return: 新文件完整路径 """ # 拼接新文件名:目录 + hash_id.pdf new_file_name = f"{hash_id}.pdf" new_pdf_path = os.path.join(PDF_SAVE_DIR, new_file_name) # 执行重命名 shutil.move(original_pdf_path, new_pdf_path) return new_pdf_path def get_latest_file(dir_path: str, suffix: str) -> str: """ 获取目录下修改时间最新的指定后缀文件 :param dir_path: 目标文件夹路径 :param suffix: 文件后缀,例: .json、.pdf、.txt :return: 最新文件完整路径 """ # 校验目录是否存在 if not os.path.isdir(dir_path): raise HTTPException(status_code=404, detail="文件夹不存在") # 筛选对应后缀文件 file_list = [] for file in os.listdir(dir_path): file_full_path = os.path.join(dir_path, file) if os.path.isfile(file_full_path) and file.lower().endswith(suffix.lower()): mtime = os.path.getmtime(file_full_path) file_list.append((mtime, file_full_path)) if not file_list: raise HTTPException(status_code=404, detail=f"目录下未找到 {suffix} 格式文件") # 按修改时间倒序,取最新文件 file_list.sort(reverse=True, key=lambda x: x[0]) return file_list[0][1]