import pandas as pd import math, json import openai, re, ast, requests # client = openai.OpenAI(api_key='sk-VG24X609Gy3L7U4Q995d82Ad95A742B4A3E873404c1067D1', base_url='https://fast.bemore.lol/v1') client = openai.OpenAI(api_key='sk-proj-d9t4UfFWe65DE9KHalwTT3BlbkFJTj0H36OsRBnu9Te9KOL7', base_url='https://api.openai.com/v1') # client = openai.OpenAI(api_key='sk-proj-LA7DMHievtKRMZfQwZ1OT3BlbkFJmLMamYoZ5yWynUHOYtZ2', base_url='https://api.openai.com/v1') #xiaohua # client = openai.OpenAI(api_key='sk-7VeOfQrKWINHGse3XErMp2C3CNLWEstyETei7b1gTzT3BlbkFJ6iBHq4UM6xt2B3Z6h7AFr7RSpTe0wT9d_6ETBJqC0A', base_url='https://api.openai.com/v1') # client = openai.OpenAI(api_key='sk-proj-LA7DMHievtKRMZfQwZ1OT3BlbkFJmLMamYoZ5yWynUHOYtZ2', base_url='https://api.openai.com/v1') prompt = """提供的数据:{chunk} 返回的数据:""" proxy = False next_iter = False # message = [ # {'role':'system', 'content': '你是一个名字判断专家,你需要根据提供的列表中的每一个字典元素的会员姓名,判断其是否为华裔的名字,结果为 华裔 或者为 非华裔,并将结果填充到会员分类中, 整合之后返回与提供数据一样的格式给我'}, # {'role':'user', 'content':'你好,你是谁'} # ] # if not proxy: # response = client.chat.completions.create(model='gpt-4',messages=message) # result_string = response.choices[0].message.content # else: # response = client.chat.completions.create(model='gpt-4',messages=message) # result_string = response.choices[0].message.content # print(result_string) def save_dict_to_json(dictionary, filename): with open(filename, 'w', encoding='utf-8') as file: json.dump(dictionary, file, ensure_ascii=False, indent=4) def load_dict_from_json(filename): with open(filename, 'r', encoding='utf-8') as file: return json.load(file) def split_dataframe_to_dict(df, chunk_size=100): # 计算需要切割的份数 num_chunks = math.ceil(len(df) / chunk_size) # 用于存储结果的字典 result_dict = {} for i in range(num_chunks): # 切割 DataFrame start = i * chunk_size end = min((i + 1) * chunk_size, len(df)) chunk = df.iloc[start:end] # 将切割后的 DataFrame 转换为字典并存储 result_dict[f'chunk_{i+1}'] = chunk.to_dict(orient='records') return result_dict def extract_list_from_string(input_string): # 使用正则表达式查找列表部分 list_pattern = r'\[.*?\]' match = re.search(list_pattern, input_string, re.DOTALL) if match: list_string = match.group() try: # 使用 ast.literal_eval 安全地解析字符串 result = ast.literal_eval(list_string) # 检查结果是否为列表 if isinstance(result, list): return result else: print("解析结果不是列表") return None except Exception as e: print(f"解析错误: {e}") return None else: print("未找到列表结构") return None def post_openai(messages): Baseurl = "https://fast.bemore.lol" Skey = "sk-dxl4rt2wWswbdrCr1c7b8500B68c43F5B6175b90F7D672C4" payload = json.dumps({ "model": "gpt-4", "messages": messages }) url = Baseurl + "/v1/chat/completions" headers = { 'Accept': 'application/json', 'Authorization': f'Bearer {Skey}', 'User-Agent': 'Apifox/1.0.0 (https://apifox.com)', 'Content-Type': 'application/json' } response = requests.request("POST", url, headers=headers, data=payload) # 解析 JSON 数据为 Python 字典 print(response) data = response.json() # 获取 content 字段的值 content = data['choices'][0]['message']['content'] return content path = '' df = pd.read_excel(path) if not next_iter: df = pd.read_excel('会员列表.xlsx') df['会员分类'] = '' deal_result = split_dataframe_to_dict(df) save_dict_to_json(deal_result,'save.json') df_all = pd.DataFrame(columns=['会员姓名', '会员分类']) df_all.to_csv('会员列表.csv',mode='a', header=True, index=False) else: deal_result = load_dict_from_json('save.json') keys_to_remove = [] # keys_to_remove = [f'chunk_{i+1}' for i in range(43)] # keys_to_remove.remove('chunk_17') deal_result = {k: v for k, v in deal_result.items() if k not in keys_to_remove} save_dict_to_json(deal_result,'save_new.json') print(deal_result.keys()) for name, value in deal_result.items(): print(name) message = [ {'role':'system', 'content': '你是一个名字判断专家,你需要根据提供的列表中的每一个字典元素的会员姓名,判断其名字分类,分别为3类: 亚裔华人,亚裔非华人, 非亚裔,并将结果填充到会员分类中, 整合之后返回与提供数据一样的格式给我'}, {'role':'user', 'content':prompt.format(chunk=str(value))} ] if not proxy: response = client.chat.completions.create(model='gpt-4',messages=message) result_string = response.choices[0].message.content else: result_string = post_openai(message) result = extract_list_from_string(result_string) if result: df = pd.DataFrame(result) df.to_csv('会员列表.csv',mode='a', header=False, index=False) # df_all = pd.concat([df_all, df], ignore_index=True) print(df.info()) print(f'{name}处理完成, 请验收!') else: print(f'{name}出现问题啦, 请自行调试') df_origin = pd.read_excel('会员列表.xlsx') df_result = pd.read_csv('会员列表.csv') print(df_result['会员分类'].unique()) df_result.to_excel('result.xlsx') df_result['会员分类'] = df_result['会员分类'].apply(lambda x : str(x).replace('Non-Chinese', '非华裔').replace('non-Chinese', '非华裔').replace('Chinese','华裔').replace('可能是华裔','华裔')\ .replace('可能华裔','华裔').replace('非华人','华裔').replace('华人','非华裔').replace('是','华裔').replace('否','非华裔')) df_origin_use = df_origin[['会员姓名']] df_result.to_excel('加分类的原始表.xlsx') print('*******************************') df_origin_use = df_origin_use.drop_duplicates(subset=['会员姓名']) print('*******************************') df_final = df_origin_use.merge(df_result, on='会员姓名', how='left') df_final = df_final.drop_duplicates(subset=['会员姓名']) df_final.to_excel('加分类并对原表对应.xlsx')