Explorar el Código

'api的功能增加'

wangdalin hace 10 meses
padre
commit
00b3d25e99
Se han modificado 7 ficheros con 273 adiciones y 160 borrados
  1. 2 0
      .gitignore
  2. 48 0
      client.py
  3. 0 156
      name_classify.py
  4. 171 0
      name_classify_api.py
  5. 7 4
      requirements.txt
  6. 13 0
      tools/merge_df.py
  7. 32 0
      tools/test_openai_key.py

+ 2 - 0
.gitignore

@@ -1 +1,3 @@
 data_before
+data_out
+process

+ 48 - 0
client.py

@@ -0,0 +1,48 @@
+import requests
+
+# 定义请求的数据
+data = {
+    "path": "./process/wangdalin\\人群分类-自建站2024.08.xlsx",
+    "client_id": "wangdalin",
+    "one_key": "网店单号",
+    "name_column": "收货姓名",
+    "api_key": "sk-iREtaVNjamaBArOTlc_2BfGFJVPiU-9EjSFMUspIPBT3BlbkFJxS0SMmKZD9L9UumPczee4VKawCwVeGBQAr9MgsWGkA",
+    "proxy": False,  # 根据需要设置为 True 或 False
+    "chunk_size":100
+}
+
+# 发送 POST 请求
+response = requests.post("http://10.41.3.69:8070/classify/", json=data)
+
+# 处理响应
+if response.status_code == 200:
+    result = response.json()
+    print("接口响应:", result)
+else:
+    print(f"请求失败,状态码: {response.status_code}")
+    print("错误信息:", response.text)
+
+
+################################
+# # 文件路径
+# file_path = 'E:/code/name_classify/data_before/人群分类-自建站2024.08.xlsx'
+
+# # 表单数据
+# data = {
+#     'client_id': 'wangdalin'
+# }
+
+# # 发送 POST 请求
+# with open(file_path, 'rb') as file:
+#     files = {
+#         'file': ('人群分类-自建站2024.08.xlsx', file, 'text/plain')  # 'text/plain' 是文件类型,可以根据需要修改
+#     }
+#     response = requests.post("http://10.41.3.69:8070/uploadfile/", data=data, files=files)
+
+# # 处理响应
+# if response.status_code == 200:
+#     result = response.json()
+#     print("接口响应:", result)
+# else:
+#     print(f"请求失败,状态码: {response.status_code}")
+#     print("错误信息:", response.text)

+ 0 - 156
name_classify.py

@@ -1,156 +0,0 @@
-import pandas as pd 
-import math, json
-import openai, re, ast, requests
-# client = openai.OpenAI(api_key='sk-VG24X609Gy3L7U4Q995d82Ad95A742B4A3E873404c1067D1', base_url='https://fast.bemore.lol/v1')
-client = openai.OpenAI(api_key='sk-proj-d9t4UfFWe65DE9KHalwTT3BlbkFJTj0H36OsRBnu9Te9KOL7', base_url='https://api.openai.com/v1')
-# client = openai.OpenAI(api_key='sk-proj-LA7DMHievtKRMZfQwZ1OT3BlbkFJmLMamYoZ5yWynUHOYtZ2', base_url='https://api.openai.com/v1')
-#xiaohua
-# client = openai.OpenAI(api_key='sk-7VeOfQrKWINHGse3XErMp2C3CNLWEstyETei7b1gTzT3BlbkFJ6iBHq4UM6xt2B3Z6h7AFr7RSpTe0wT9d_6ETBJqC0A', base_url='https://api.openai.com/v1')
-
-# client = openai.OpenAI(api_key='sk-proj-LA7DMHievtKRMZfQwZ1OT3BlbkFJmLMamYoZ5yWynUHOYtZ2', base_url='https://api.openai.com/v1')
-prompt = """提供的数据:{chunk}
-返回的数据:"""
-proxy = False
-next_iter = False
-# message = [
-#         {'role':'system', 'content': '你是一个名字判断专家,你需要根据提供的列表中的每一个字典元素的会员姓名,判断其是否为华裔的名字,结果为 华裔 或者为 非华裔,并将结果填充到会员分类中, 整合之后返回与提供数据一样的格式给我'},
-#         {'role':'user', 'content':'你好,你是谁'}
-#     ]
-# if not proxy:
-#     response = client.chat.completions.create(model='gpt-4',messages=message)
-#     result_string = response.choices[0].message.content
-# else:
-#     response = client.chat.completions.create(model='gpt-4',messages=message)
-#     result_string = response.choices[0].message.content
-
-# print(result_string)
-def save_dict_to_json(dictionary, filename):
-    with open(filename, 'w', encoding='utf-8') as file:
-        json.dump(dictionary, file, ensure_ascii=False, indent=4)
-
-def load_dict_from_json(filename):
-    with open(filename, 'r', encoding='utf-8') as file:
-        return json.load(file)
-def split_dataframe_to_dict(df, chunk_size=100):
-    # 计算需要切割的份数
-    num_chunks = math.ceil(len(df) / chunk_size)
-    
-    # 用于存储结果的字典
-    result_dict = {}
-    
-    for i in range(num_chunks):
-        # 切割 DataFrame
-        start = i * chunk_size
-        end = min((i + 1) * chunk_size, len(df))
-        chunk = df.iloc[start:end]
-        
-        # 将切割后的 DataFrame 转换为字典并存储
-        result_dict[f'chunk_{i+1}'] = chunk.to_dict(orient='records')
-    
-    return result_dict
-def extract_list_from_string(input_string):
-    # 使用正则表达式查找列表部分
-    list_pattern = r'\[.*?\]'
-    match = re.search(list_pattern, input_string, re.DOTALL)
-    
-    if match:
-        list_string = match.group()
-        try:
-            # 使用 ast.literal_eval 安全地解析字符串
-            result = ast.literal_eval(list_string)
-            
-            # 检查结果是否为列表
-            if isinstance(result, list):
-                return result
-            else:
-                print("解析结果不是列表")
-                return None
-        except Exception as e:
-            print(f"解析错误: {e}")
-            return None
-    else:
-        print("未找到列表结构")
-        return None
-def post_openai(messages):
-   Baseurl = "https://fast.bemore.lol"
-   Skey = "sk-dxl4rt2wWswbdrCr1c7b8500B68c43F5B6175b90F7D672C4"
-   payload = json.dumps({
-      "model": "gpt-4",
-      "messages": messages
-   })
-   url = Baseurl + "/v1/chat/completions"
-   headers = {
-      'Accept': 'application/json',
-      'Authorization': f'Bearer {Skey}',
-      'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
-      'Content-Type': 'application/json'
-   }
-
-   response = requests.request("POST", url, headers=headers, data=payload)
-
-   # 解析 JSON 数据为 Python 字典
-   print(response)
-   data = response.json()
-
-   # 获取 content 字段的值
-   content = data['choices'][0]['message']['content']
-
-   return content
-
-path = ''
-df = pd.read_excel(path)
-
-if not next_iter:
-    df = pd.read_excel('会员列表.xlsx')
-    df['会员分类'] = ''
-    deal_result = split_dataframe_to_dict(df)
-    save_dict_to_json(deal_result,'save.json')
-    df_all = pd.DataFrame(columns=['会员姓名', '会员分类'])
-    df_all.to_csv('会员列表.csv',mode='a', header=True, index=False)
-else:
-    deal_result = load_dict_from_json('save.json')
-    keys_to_remove = []
-    # keys_to_remove = [f'chunk_{i+1}' for i in range(43)]
-    # keys_to_remove.remove('chunk_17')
-    deal_result = {k: v for k, v in deal_result.items() if k not in keys_to_remove}
-    save_dict_to_json(deal_result,'save_new.json')
-    print(deal_result.keys())
-for name, value in deal_result.items():
-    print(name)
-    message = [
-        {'role':'system', 'content': '你是一个名字判断专家,你需要根据提供的列表中的每一个字典元素的会员姓名,判断其名字分类,分别为3类: 亚裔华人,亚裔非华人, 非亚裔,并将结果填充到会员分类中, 整合之后返回与提供数据一样的格式给我'},
-        {'role':'user', 'content':prompt.format(chunk=str(value))}
-    ]
-    if not proxy:
-        response = client.chat.completions.create(model='gpt-4',messages=message)
-        result_string = response.choices[0].message.content
-    else:
-        result_string = post_openai(message)
-    result = extract_list_from_string(result_string)
-    if result:
-        df = pd.DataFrame(result)
-        df.to_csv('会员列表.csv',mode='a', header=False, index=False)
-        # df_all = pd.concat([df_all, df], ignore_index=True)
-        print(df.info())
-        print(f'{name}处理完成, 请验收!')
-    else:
-        print(f'{name}出现问题啦, 请自行调试')
-
-df_origin = pd.read_excel('会员列表.xlsx')
-df_result = pd.read_csv('会员列表.csv')
-print(df_result['会员分类'].unique())
-df_result.to_excel('result.xlsx')
-df_result['会员分类'] = df_result['会员分类'].apply(lambda x : str(x).replace('Non-Chinese', '非华裔').replace('non-Chinese', '非华裔').replace('Chinese','华裔').replace('可能是华裔','华裔')\
-                                            .replace('可能华裔','华裔').replace('非华人','华裔').replace('华人','非华裔').replace('是','华裔').replace('否','非华裔'))
-df_origin_use = df_origin[['会员姓名']]
-df_result.to_excel('加分类的原始表.xlsx')
-print('*******************************')
-df_origin_use = df_origin_use.drop_duplicates(subset=['会员姓名'])
-
-print('*******************************')
-
-df_final = df_origin_use.merge(df_result, on='会员姓名', how='left')
-
-df_final = df_final.drop_duplicates(subset=['会员姓名'])
-
-df_final.to_excel('加分类并对原表对应.xlsx')

+ 171 - 0
name_classify_api.py

@@ -0,0 +1,171 @@
+import pandas as pd 
+import math, json, os
+import openai, re, ast, requests
+from fastapi import FastAPI, UploadFile, File, Form
+from pydantic import BaseModel
+from fastapi.responses import JSONResponse
+from datetime import datetime
+import uvicorn
+from tqdm import tqdm
+app = FastAPI()
+class ClassificationRequest(BaseModel):
+    path: str
+    client_id: str
+    one_key: str
+    name_column: str
+    api_key: str
+    proxy: bool
+    chunk_size: int
+def save_dict_to_json(dictionary, filename):
+    with open(filename, 'w', encoding='utf-8') as file:
+        json.dump(dictionary, file, ensure_ascii=False, indent=4)
+
+def load_dict_from_json(filename):
+    with open(filename, 'r', encoding='utf-8') as file:
+        return json.load(file)
+def split_dataframe_to_dict(df, chunk_size=100):
+    # 计算需要切割的份数
+    num_chunks = math.ceil(len(df) / chunk_size)
+    
+    # 用于存储结果的字典
+    result_dict = {}
+    
+    for i in range(num_chunks):
+        # 切割 DataFrame
+        start = i * chunk_size
+        end = min((i + 1) * chunk_size, len(df))
+        chunk = df.iloc[start:end]
+        
+        # 将切割后的 DataFrame 转换为字典并存储
+        result_dict[f'chunk_{i+1}'] = chunk.to_dict(orient='records')
+    
+    return result_dict
+def extract_list_from_string(input_string):
+    # 使用正则表达式查找列表部分
+    list_pattern = r'\[.*?\]'
+    match = re.search(list_pattern, input_string, re.DOTALL)
+    
+    if match:
+        list_string = match.group()
+        try:
+            # 使用 ast.literal_eval 安全地解析字符串
+            result = ast.literal_eval(list_string)
+            
+            # 检查结果是否为列表
+            if isinstance(result, list):
+                return result
+            else:
+                print("解析结果不是列表")
+                return None
+        except Exception as e:
+            print(f"解析错误: {e}")
+            return None
+    else:
+        print("未找到列表结构")
+        return None
+def post_openai(messages):
+   Baseurl = "https://fast.bemore.lol"
+   Skey = "sk-dxl4rt2wWswbdrCr1c7b8500B68c43F5B6175b90F7D672C4"
+   payload = json.dumps({
+      "model": "gpt-4",
+      "messages": messages
+   })
+   url = Baseurl + "/v1/chat/completions"
+   headers = {
+      'Accept': 'application/json',
+      'Authorization': f'Bearer {Skey}',
+      'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
+      'Content-Type': 'application/json'
+   }
+
+   response = requests.request("POST", url, headers=headers, data=payload)
+
+   # 解析 JSON 数据为 Python 字典
+   print(response)
+   data = response.json()
+
+   # 获取 content 字段的值
+   content = data['choices'][0]['message']['content']
+
+   return content
+
+@app.post("/uploadfile/")
+async def create_upload_file(file: UploadFile = File(...), client_id: str = Form(...)):
+    user_directory = f'./process/{client_id}'
+    if not os.path.exists(user_directory):
+        os.makedirs(user_directory)
+        os.chmod(user_directory, 0o777)  # 设置用户目录权限为777
+    print(user_directory)
+    print(file.filename)
+    file_location = os.path.join(user_directory, file.filename)
+    print(file_location)
+    try:
+        with open(file_location, "wb+") as file_object:
+            file_object.write(file.file.read())
+        os.chmod(file_location, 0o777)  # 设置文件权限为777
+        return JSONResponse(content={
+            "message": f"文件 '{file.filename}' 上传成功",
+            "client_id": client_id,
+            "file_path": file_location
+        }, status_code=200)
+    except Exception as e:
+        return JSONResponse(content={"message": f"发生错误: {str(e)}"}, status_code=500)
+    
+@app.post("/classify/")
+async def classify_data(request: ClassificationRequest):
+    try:
+        prompt = """提供的数据:{chunk}
+                返回的数据:"""
+        work_path = f'./process/{request.client_id}'
+        if not os.path.exists(work_path):
+            os.makedirs(work_path, exist_ok=True)
+        timestamp_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+        df_origin = pd.read_excel(request.path)
+        df_origin['name'] = df_origin[request.name_column]
+        df_origin['classify'] = ''
+        df_use = df_origin[['name', 'classify']]
+        deal_result = split_dataframe_to_dict(df_use, request.chunk_size)
+        # 生成当前时间的时间戳字符串
+        
+        temp_csv = work_path + '/' + timestamp_str + 'output_temp.csv'
+        final_file_name, final_file_extension = os.path.splitext(os.path.basename(request.path))
+        # 添加后缀
+        final_file = final_file_name + '_classify' + final_file_extension
+
+        # 生成新的文件路径
+        new_file_path = os.path.join(os.path.dirname(request.path), final_file)
+
+        if not request.proxy:
+            print(f'用户{request.client_id}正在使用直连的gpt-API')
+            client = openai.OpenAI(api_key=request.api_key, base_url='https://api.openai.com/v1')
+        else:
+            client = openai.OpenAI(api_key=request.api_key, base_url='https://fast.bemore.lol/v1')
+        for name, value in tqdm(deal_result.items(), desc='Processing', unit='item'):
+            try:
+                message = [
+                    {'role':'system', 'content': '你是一个名字判断专家,你需要根据提供的列表中的每一个字典元素的会员姓名,判断其名字分类,分别为3类: 亚裔华人,亚裔非华人, 非亚裔,并将结果填充到会员分类中, 整合之后返回与提供数据一样的格式给我'},
+                    {'role':'user', 'content':prompt.format(chunk=str(value))}
+                ]
+                # result_string = post_openai(message)
+                response = client.chat.completions.create(model='gpt-4',messages=message)
+                result_string = response.choices[0].message.content
+                result = extract_list_from_string(result_string)
+                if result:
+                    df_output = pd.DataFrame(result)
+                    df_output.to_csv(temp_csv, mode='a', header=True, index=False)
+                else:
+                    continue
+            except Exception as e:
+                print(f'{name}出现问题啦, 错误为:{e} 请自行调试')
+        if os.path.exists(temp_csv):
+            df_result = pd.read_csv(temp_csv)
+            df_final = df_origin.merge(df_result, on='name', how='left').drop_duplicates(subset=[request.one_key,'name'], keep='first')
+            df_final.to_excel(new_file_path)
+            return {"message": "分类完成", "output_file": new_file_path}
+        else:
+            return {"message": "文件没能处理成功"}
+    except Exception as e:
+        return {"message": f"处理出现错误: {e}"}
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8070)

+ 7 - 4
requirements.txt

@@ -1,4 +1,7 @@
-pandas
-openai
-requests
-openpyxl
+pandas
+openai
+requests
+openpyxl
+fastapi
+python-multipart
+uvicorn

+ 13 - 0
tools/merge_df.py

@@ -0,0 +1,13 @@
+import pandas as pd
+# path = r'E:\code\name_classify\data_before\Shopify7月订单-客户数据.xlsx'
+# df_origin = pd.read_excel(path)
+# df_result = pd.read_csv(r'E:\code\name_classify\data_before\result_1.csv')
+
+# # df_result['classify'] = df_result['v'].apply(lambda x : str(x).replace('Non-Chinese', '非华裔').replace('non-Chinese', '非华裔').replace('Chinese','华裔').replace('可能是华裔','华裔')\
+# #                                             .replace('可能华裔','华裔').replace('非华人','华裔').replace('华人','非华裔').replace('是','华裔').replace('否','非华裔'))
+
+# df_final = df_origin.merge(df_result, on='name', how='left')
+# df_final = df_origin.merge(df_result, on='name', how='left').drop_duplicates(subset=['Name','name'], keep='first')
+df_final = pd.read_excel(r'E:\code\name_classify\output_final.xlsx')
+df_final.drop_duplicates(subset=['Name','name'], keep='first')
+df_final.to_excel('output_final_8.xlsx')

+ 32 - 0
tools/test_openai_key.py

@@ -0,0 +1,32 @@
+import openai
+import argparse
+
+def get_openai_response(proxy=False, key=''):
+    # 初始化客户端
+    if not proxy:
+        client = openai.OpenAI(api_key=key, base_url='https://api.openai.com/v1')
+    else:
+        client = openai.OpenAI(api_key=key, base_url='https://fast.bemore.lol/v1')
+
+    # 设置消息
+    message = [
+        {'role': 'system', 'content': '你是一个名字判断专家,你需要根据提供的列表中的每一个字典元素的会员姓名,判断其是否为华裔的名字, 结果为 华裔 或者为 非华裔,并将结果填充到会员分类中, 整合之后返回与提供数据一样的格式给我'},
+        {'role': 'user', 'content': '你好,你是谁'}
+    ]
+    
+    # 发起请求
+    response = client.chat.completions.create(model='gpt-4', messages=message)
+    result_string = response.choices[0].message.content
+    return result_string
+
+def main():
+    parser = argparse.ArgumentParser(description='OpenAI API Client')
+    parser.add_argument('--proxy', action='store_true', help='Use proxy for API requests')
+    parser.add_argument('--key', type=str, required=True, help='API key for OpenAI')
+    args = parser.parse_args()
+
+    result = get_openai_response(proxy=args.proxy, key=args.key)
+    print(result)
+
+if __name__ == '__main__':
+    main()