|
@@ -1,104 +1,33 @@
|
|
import pandas as pd
|
|
import pandas as pd
|
|
-import math, json, os, time, shutil
|
|
|
|
-import openai, re, ast, requests
|
|
|
|
|
|
+import os, time, shutil
|
|
|
|
+import openai
|
|
from fastapi import FastAPI, UploadFile, File, Form
|
|
from fastapi import FastAPI, UploadFile, File, Form
|
|
from pydantic import BaseModel
|
|
from pydantic import BaseModel
|
|
from fastapi.responses import JSONResponse
|
|
from fastapi.responses import JSONResponse
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
-import uvicorn
|
|
|
|
|
|
+import uvicorn, socket
|
|
from tqdm import tqdm
|
|
from tqdm import tqdm
|
|
|
|
+from fastapi.staticfiles import StaticFiles
|
|
|
|
+from config import *
|
|
|
|
+from functions import split_dataframe_to_dict, extract_list_from_string
|
|
app = FastAPI()
|
|
app = FastAPI()
|
|
|
|
+app.mount("/data", StaticFiles(directory='./process'), name="static")
|
|
class ClassificationRequest(BaseModel):
|
|
class ClassificationRequest(BaseModel):
|
|
path: str
|
|
path: str
|
|
client_id: str
|
|
client_id: str
|
|
one_key: str
|
|
one_key: str
|
|
name_column: str
|
|
name_column: str
|
|
- api_key: str
|
|
|
|
- proxy: bool
|
|
|
|
- chunk_size: int
|
|
|
|
-def save_dict_to_json(dictionary, filename):
|
|
|
|
- with open(filename, 'w', encoding='utf-8') as file:
|
|
|
|
- json.dump(dictionary, file, ensure_ascii=False, indent=4)
|
|
|
|
-
|
|
|
|
-def load_dict_from_json(filename):
|
|
|
|
- with open(filename, 'r', encoding='utf-8') as file:
|
|
|
|
- return json.load(file)
|
|
|
|
-def split_dataframe_to_dict(df, chunk_size=100):
|
|
|
|
- # 计算需要切割的份数
|
|
|
|
- num_chunks = math.ceil(len(df) / chunk_size)
|
|
|
|
-
|
|
|
|
- # 用于存储结果的字典
|
|
|
|
- result_dict = {}
|
|
|
|
-
|
|
|
|
- for i in range(num_chunks):
|
|
|
|
- # 切割 DataFrame
|
|
|
|
- start = i * chunk_size
|
|
|
|
- end = min((i + 1) * chunk_size, len(df))
|
|
|
|
- chunk = df.iloc[start:end]
|
|
|
|
-
|
|
|
|
- # 将切割后的 DataFrame 转换为字典并存储
|
|
|
|
- result_dict[f'chunk_{i+1}'] = chunk.to_dict(orient='records')
|
|
|
|
-
|
|
|
|
- return result_dict
|
|
|
|
-def extract_list_from_string(input_string):
|
|
|
|
- # 使用正则表达式查找列表部分
|
|
|
|
- list_pattern = r'\[.*?\]'
|
|
|
|
- match = re.search(list_pattern, input_string, re.DOTALL)
|
|
|
|
-
|
|
|
|
- if match:
|
|
|
|
- list_string = match.group()
|
|
|
|
- try:
|
|
|
|
- # 使用 ast.literal_eval 安全地解析字符串
|
|
|
|
- result = ast.literal_eval(list_string)
|
|
|
|
-
|
|
|
|
- # 检查结果是否为列表
|
|
|
|
- if isinstance(result, list):
|
|
|
|
- return result
|
|
|
|
- else:
|
|
|
|
- print("解析结果不是列表")
|
|
|
|
- return None
|
|
|
|
- except Exception as e:
|
|
|
|
- print(f"解析错误: {e}")
|
|
|
|
- return None
|
|
|
|
- else:
|
|
|
|
- print("未找到列表结构")
|
|
|
|
- return None
|
|
|
|
-def post_openai(messages):
|
|
|
|
- Baseurl = "https://fast.bemore.lol"
|
|
|
|
- Skey = "sk-dxl4rt2wWswbdrCr1c7b8500B68c43F5B6175b90F7D672C4"
|
|
|
|
- payload = json.dumps({
|
|
|
|
- "model": "gpt-4",
|
|
|
|
- "messages": messages
|
|
|
|
- })
|
|
|
|
- url = Baseurl + "/v1/chat/completions"
|
|
|
|
- headers = {
|
|
|
|
- 'Accept': 'application/json',
|
|
|
|
- 'Authorization': f'Bearer {Skey}',
|
|
|
|
- 'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
|
|
|
|
- 'Content-Type': 'application/json'
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload)
|
|
|
|
-
|
|
|
|
- # 解析 JSON 数据为 Python 字典
|
|
|
|
- print(response)
|
|
|
|
- data = response.json()
|
|
|
|
-
|
|
|
|
- # 获取 content 字段的值
|
|
|
|
- content = data['choices'][0]['message']['content']
|
|
|
|
-
|
|
|
|
- return content
|
|
|
|
|
|
+ api_key: str = "sk-iREtaVNjamaBArOTlc_2BfGFJVPiU-9EjSFMUspIPBT3BlbkFJxS0SMmKZD9L9UumPczee4VKawCwVeGBQAr9MgsWGkA"
|
|
|
|
+ proxy: bool = False
|
|
|
|
+ chunk_size: int = 100
|
|
|
|
|
|
@app.post("/uploadfile/")
|
|
@app.post("/uploadfile/")
|
|
async def create_upload_file(file: UploadFile = File(...), client_id: str = Form(...)):
|
|
async def create_upload_file(file: UploadFile = File(...), client_id: str = Form(...)):
|
|
- user_directory = f'./process/{client_id}'
|
|
|
|
|
|
+ user_directory = f'{basic_path}/{client_id}'
|
|
if not os.path.exists(user_directory):
|
|
if not os.path.exists(user_directory):
|
|
os.makedirs(user_directory)
|
|
os.makedirs(user_directory)
|
|
os.chmod(user_directory, 0o777) # 设置用户目录权限为777
|
|
os.chmod(user_directory, 0o777) # 设置用户目录权限为777
|
|
- print(user_directory)
|
|
|
|
- print(file.filename)
|
|
|
|
file_location = os.path.join(user_directory, file.filename)
|
|
file_location = os.path.join(user_directory, file.filename)
|
|
- print(file_location)
|
|
|
|
try:
|
|
try:
|
|
with open(file_location, "wb+") as file_object:
|
|
with open(file_location, "wb+") as file_object:
|
|
file_object.write(file.file.read())
|
|
file_object.write(file.file.read())
|
|
@@ -117,7 +46,6 @@ async def classify_data(request: ClassificationRequest):
|
|
current_time = time.time()
|
|
current_time = time.time()
|
|
TIME_THRESHOLD_FILEPATH = 30 * 24 * 60 * 60
|
|
TIME_THRESHOLD_FILEPATH = 30 * 24 * 60 * 60
|
|
TIME_THRESHOLD_FILE = 10 * 24 * 60 * 60
|
|
TIME_THRESHOLD_FILE = 10 * 24 * 60 * 60
|
|
- basic_path = './process'
|
|
|
|
for root, dirs, files in os.walk(basic_path, topdown=False):
|
|
for root, dirs, files in os.walk(basic_path, topdown=False):
|
|
# 删除文件
|
|
# 删除文件
|
|
for file in files:
|
|
for file in files:
|
|
@@ -131,8 +59,7 @@ async def classify_data(request: ClassificationRequest):
|
|
if current_time - os.path.getmtime(dir_path) > TIME_THRESHOLD_FILEPATH:
|
|
if current_time - os.path.getmtime(dir_path) > TIME_THRESHOLD_FILEPATH:
|
|
print(f"删除文件夹: {dir_path}")
|
|
print(f"删除文件夹: {dir_path}")
|
|
shutil.rmtree(dir_path)
|
|
shutil.rmtree(dir_path)
|
|
- prompt = """提供的数据:{chunk}
|
|
|
|
- 返回的数据:"""
|
|
|
|
|
|
+
|
|
work_path = f'{basic_path}/{request.client_id}'
|
|
work_path = f'{basic_path}/{request.client_id}'
|
|
if not os.path.exists(work_path):
|
|
if not os.path.exists(work_path):
|
|
os.makedirs(work_path, exist_ok=True)
|
|
os.makedirs(work_path, exist_ok=True)
|
|
@@ -154,14 +81,14 @@ async def classify_data(request: ClassificationRequest):
|
|
|
|
|
|
if not request.proxy:
|
|
if not request.proxy:
|
|
print(f'用户{request.client_id}正在使用直连的gpt-API')
|
|
print(f'用户{request.client_id}正在使用直连的gpt-API')
|
|
- client = openai.OpenAI(api_key=request.api_key, base_url='https://api.openai.com/v1')
|
|
|
|
|
|
+ client = openai.OpenAI(api_key=request.api_key, base_url=openai_url)
|
|
else:
|
|
else:
|
|
- client = openai.OpenAI(api_key=request.api_key, base_url='https://fast.bemore.lol/v1')
|
|
|
|
|
|
+ client = openai.OpenAI(api_key=request.api_key, base_url=proxy_url)
|
|
for name, value in tqdm(deal_result.items(), desc='Processing', unit='item'):
|
|
for name, value in tqdm(deal_result.items(), desc='Processing', unit='item'):
|
|
try:
|
|
try:
|
|
message = [
|
|
message = [
|
|
- {'role':'system', 'content': '你是一个名字判断专家,你需要根据提供的列表中的每一个字典元素的会员姓名,判断其名字分类,分别为3类: 亚裔华人,亚裔非华人, 非亚裔,并将结果填充到会员分类中, 整合之后返回与提供数据一样的格式给我'},
|
|
|
|
- {'role':'user', 'content':prompt.format(chunk=str(value))}
|
|
|
|
|
|
+ {'role':'system', 'content': cls_system_prompt},
|
|
|
|
+ {'role':'user', 'content':user_prompt.format(chunk=str(value))}
|
|
]
|
|
]
|
|
# result_string = post_openai(message)
|
|
# result_string = post_openai(message)
|
|
response = client.chat.completions.create(model='gpt-4',messages=message)
|
|
response = client.chat.completions.create(model='gpt-4',messages=message)
|
|
@@ -178,11 +105,11 @@ async def classify_data(request: ClassificationRequest):
|
|
df_result = pd.read_csv(temp_csv)
|
|
df_result = pd.read_csv(temp_csv)
|
|
df_final = df_origin.merge(df_result, on='name', how='left').drop_duplicates(subset=[request.one_key,'name'], keep='first')
|
|
df_final = df_origin.merge(df_result, on='name', how='left').drop_duplicates(subset=[request.one_key,'name'], keep='first')
|
|
df_final.to_excel(new_file_path)
|
|
df_final.to_excel(new_file_path)
|
|
- return {"message": "分类完成", "output_file": new_file_path}
|
|
|
|
|
|
+ return {"message": "分类完成", "output_file": file_base_url + new_file_path.split(basic_path)[1]}
|
|
else:
|
|
else:
|
|
return {"message": "文件没能处理成功"}
|
|
return {"message": "文件没能处理成功"}
|
|
except Exception as e:
|
|
except Exception as e:
|
|
return {"message": f"处理出现错误: {e}"}
|
|
return {"message": f"处理出现错误: {e}"}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- uvicorn.run(app, host="0.0.0.0", port=8070)
|
|
|
|
|
|
+ uvicorn.run(app, host="0.0.0.0", port=port)
|