12345678910111213 |
- import pandas as pd
- # path = r'E:\code\name_classify\data_before\Shopify7月订单-客户数据.xlsx'
- # df_origin = pd.read_excel(path)
- # df_result = pd.read_csv(r'E:\code\name_classify\data_before\result_1.csv')
- # # df_result['classify'] = df_result['v'].apply(lambda x : str(x).replace('Non-Chinese', '非华裔').replace('non-Chinese', '非华裔').replace('Chinese','华裔').replace('可能是华裔','华裔')\
- # # .replace('可能华裔','华裔').replace('非华人','华裔').replace('华人','非华裔').replace('是','华裔').replace('否','非华裔'))
- # df_final = df_origin.merge(df_result, on='name', how='left')
- # df_final = df_origin.merge(df_result, on='name', how='left').drop_duplicates(subset=['Name','name'], keep='first')
- df_final = pd.read_excel(r'E:\code\name_classify\output_final.xlsx')
- df_final.drop_duplicates(subset=['Name','name'], keep='first')
- df_final.to_excel('output_final_8.xlsx')
|