123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- import pandas as pd
- import os, time, shutil
- import openai
- from fastapi import FastAPI, UploadFile, File, Form
- from pydantic import BaseModel
- from fastapi.responses import JSONResponse
- from datetime import datetime
- import uvicorn, socket
- from tqdm import tqdm
- from fastapi.staticfiles import StaticFiles
- from config import *
- from functions import split_dataframe_to_dict, extract_list_from_string
- app = FastAPI()
- app.mount("/data", StaticFiles(directory='./process'), name="static")
- class ClassificationRequest(BaseModel):
- path: str
- client_id: str
- one_key: str
- name_column: str
- api_key: str = "sk-iREtaVNjamaBArOTlc_2BfGFJVPiU-9EjSFMUspIPBT3BlbkFJxS0SMmKZD9L9UumPczee4VKawCwVeGBQAr9MgsWGkA"
- proxy: bool = False
- chunk_size: int = 100
- @app.post("/uploadfile/")
- async def create_upload_file(file: UploadFile = File(...), client_id: str = Form(...)):
- user_directory = f'{basic_path}/{client_id}'
- if not os.path.exists(user_directory):
- os.makedirs(user_directory)
- os.chmod(user_directory, 0o777) # 设置用户目录权限为777
- file_location = os.path.join(user_directory, file.filename)
- try:
- with open(file_location, "wb+") as file_object:
- file_object.write(file.file.read())
- os.chmod(file_location, 0o777) # 设置文件权限为777
- return JSONResponse(content={
- "message": f"文件 '{file.filename}' 上传成功",
- "client_id": client_id,
- "file_path": file_location
- }, status_code=200)
- except Exception as e:
- return JSONResponse(content={"message": f"发生错误: {str(e)}"}, status_code=500)
-
- @app.post("/classify/")
- async def classify_data(request: ClassificationRequest):
- try:
- current_time = time.time()
- TIME_THRESHOLD_FILEPATH = 30 * 24 * 60 * 60
- TIME_THRESHOLD_FILE = 10 * 24 * 60 * 60
- for root, dirs, files in os.walk(basic_path, topdown=False):
- # 删除文件
- for file in files:
- file_path = os.path.join(root, file)
- if current_time - os.path.getmtime(file_path) > TIME_THRESHOLD_FILE:
- print(f"删除文件: {file_path}")
- os.remove(file_path)
- # 删除文件夹
- for dir in dirs:
- dir_path = os.path.join(root, dir)
- if current_time - os.path.getmtime(dir_path) > TIME_THRESHOLD_FILEPATH:
- print(f"删除文件夹: {dir_path}")
- shutil.rmtree(dir_path)
-
- work_path = f'{basic_path}/{request.client_id}'
- if not os.path.exists(work_path):
- os.makedirs(work_path, exist_ok=True)
- timestamp_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
- df_origin = pd.read_excel(request.path)
- df_origin['name'] = df_origin[request.name_column]
- df_origin['classify'] = ''
- df_use = df_origin[['name', 'classify']]
- deal_result = split_dataframe_to_dict(df_use, request.chunk_size)
- # 生成当前时间的时间戳字符串
-
- temp_csv = work_path + '/' + timestamp_str + 'output_temp.csv'
- final_file_name, final_file_extension = os.path.splitext(os.path.basename(request.path))
- # 添加后缀
- final_file = final_file_name + '_classify' + final_file_extension
- # 生成新的文件路径
- new_file_path = os.path.join(os.path.dirname(request.path), final_file)
- if not request.proxy:
- print(f'用户{request.client_id}正在使用直连的gpt-API')
- client = openai.OpenAI(api_key=request.api_key, base_url=openai_url)
- else:
- client = openai.OpenAI(api_key=request.api_key, base_url=proxy_url)
- for name, value in tqdm(deal_result.items(), desc='Processing', unit='item'):
- try:
- message = [
- {'role':'system', 'content': cls_system_prompt},
- {'role':'user', 'content':user_prompt.format(chunk=str(value))}
- ]
- # result_string = post_openai(message)
- response = client.chat.completions.create(model='gpt-4',messages=message)
- result_string = response.choices[0].message.content
- result = extract_list_from_string(result_string)
- if result:
- df_output = pd.DataFrame(result)
- df_output.to_csv(temp_csv, mode='a', header=True, index=False)
- else:
- continue
- except Exception as e:
- print(f'{name}出现问题啦, 错误为:{e} 请自行调试')
- if os.path.exists(temp_csv):
- df_result = pd.read_csv(temp_csv)
- df_final = df_origin.merge(df_result, on='name', how='left').drop_duplicates(subset=[request.one_key,'name'], keep='first')
- df_final.to_excel(new_file_path)
- return {"message": "分类完成", "output_file": file_base_url + new_file_path.split(basic_path)[1]}
- else:
- return {"message": "文件没能处理成功"}
- except Exception as e:
- return {"message": f"处理出现错误: {e}"}
- if __name__ == "__main__":
- uvicorn.run(app, host="0.0.0.0", port=port)
|