mia.liu
/
name_classify


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
							import pandas as pd 
import os, time, shutil
import openai
from fastapi import FastAPI, UploadFile, File, Form
from pydantic import BaseModel
from fastapi.responses import JSONResponse
from datetime import datetime
import uvicorn, socket
from tqdm import tqdm
from fastapi.staticfiles import StaticFiles
from config import *
from functions import split_dataframe_to_dict, extract_list_from_string
app = FastAPI()
app.mount("/data", StaticFiles(directory='./process'), name="static")
class ClassificationRequest(BaseModel):
    path: str
    client_id: str
    one_key: str
    name_column: str
    api_key: str = "sk-iREtaVNjamaBArOTlc_2BfGFJVPiU-9EjSFMUspIPBT3BlbkFJxS0SMmKZD9L9UumPczee4VKawCwVeGBQAr9MgsWGkA"
    proxy: bool = False
    chunk_size: int = 100

@app.post("/uploadfile/")
async def create_upload_file(file: UploadFile = File(...), client_id: str = Form(...)):
    user_directory = f'{basic_path}/{client_id}'
    if not os.path.exists(user_directory):
        os.makedirs(user_directory)
        os.chmod(user_directory, 0o777)  # 设置用户目录权限为777
    file_location = os.path.join(user_directory, file.filename)
    try:
        with open(file_location, "wb+") as file_object:
            file_object.write(file.file.read())
        os.chmod(file_location, 0o777)  # 设置文件权限为777
        return JSONResponse(content={
            "message": f"文件 '{file.filename}' 上传成功",
            "client_id": client_id,
            "file_path": file_location
        }, status_code=200)
    except Exception as e:
        return JSONResponse(content={"message": f"发生错误: {str(e)}"}, status_code=500)
    
@app.post("/classify/")
async def classify_data(request: ClassificationRequest):
    try:
        current_time = time.time()
        TIME_THRESHOLD_FILEPATH = 30 * 24 * 60 * 60
        TIME_THRESHOLD_FILE = 10 * 24 * 60 * 60
        for root, dirs, files in os.walk(basic_path, topdown=False):
            # 删除文件
            for file in files:
                file_path = os.path.join(root, file)
                if current_time - os.path.getmtime(file_path) > TIME_THRESHOLD_FILE:
                    print(f"删除文件: {file_path}")
                    os.remove(file_path)
            # 删除文件夹
            for dir in dirs:
                dir_path = os.path.join(root, dir)
                if current_time - os.path.getmtime(dir_path) > TIME_THRESHOLD_FILEPATH:
                    print(f"删除文件夹: {dir_path}")
                    shutil.rmtree(dir_path)
        
        work_path = f'{basic_path}/{request.client_id}'
        if not os.path.exists(work_path):
            os.makedirs(work_path, exist_ok=True)
        timestamp_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        df_origin = pd.read_excel(request.path)
        df_origin['name'] = df_origin[request.name_column]
        df_origin['classify'] = ''
        df_use = df_origin[['name', 'classify']]
        deal_result = split_dataframe_to_dict(df_use, request.chunk_size)
        # 生成当前时间的时间戳字符串
        
        temp_csv = work_path + '/' + timestamp_str + 'output_temp.csv'
        final_file_name, final_file_extension = os.path.splitext(os.path.basename(request.path))
        # 添加后缀
        final_file = final_file_name + '_classify' + final_file_extension

        # 生成新的文件路径
        new_file_path = os.path.join(os.path.dirname(request.path), final_file)

        if not request.proxy:
            print(f'用户{request.client_id}正在使用直连的gpt-API')
            client = openai.OpenAI(api_key=request.api_key, base_url=openai_url)
        else:
            client = openai.OpenAI(api_key=request.api_key, base_url=proxy_url)
        for name, value in tqdm(deal_result.items(), desc='Processing', unit='item'):
            try:
                message = [
                    {'role':'system', 'content': cls_system_prompt},
                    {'role':'user', 'content':user_prompt.format(chunk=str(value))}
                ]
                # result_string = post_openai(message)
                response = client.chat.completions.create(model='gpt-4',messages=message)
                result_string = response.choices[0].message.content
                result = extract_list_from_string(result_string)
                if result:
                    df_output = pd.DataFrame(result)
                    df_output.to_csv(temp_csv, mode='a', header=True, index=False)
                else:
                    continue
            except Exception as e:
                print(f'{name}出现问题啦, 错误为:{e} 请自行调试')
        if os.path.exists(temp_csv):
            df_result = pd.read_csv(temp_csv)
            df_final = df_origin.merge(df_result, on='name', how='left').drop_duplicates(subset=[request.one_key,'name'], keep='first')
            df_final.to_excel(new_file_path)
            return {"message": "分类完成", "output_file": file_base_url + new_file_path.split(basic_path)[1]}
        else:
            return {"message": "文件没能处理成功"}
    except Exception as e:
        return {"message": f"处理出现错误: {e}"}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=port)