order_fusion_services.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. import os
  2. import time
  3. import uuid
  4. import re
  5. from typing import Literal
  6. from config.prompt import INTENT_PROMPT, ANSWER_PROMPT
  7. from utils.tools import read_json_file, save_dict_to_json, add_suffix, extract_fields, string_to_json
  8. from utils.post_data import upload_file_to_tos
  9. from utils.logger_config import setup_logger
  10. from utils.qa_robot import text_qa, intent_reg, large_order_qa
  11. from modules.filter_goods import filter_goods
  12. from modules.get_image import get_sku_image
  13. from modules.export_excel import export_to_excel, batch_export_to_excel
  14. from modules.query_db import get_sku_info, get_sku_id, get_core_sku_freq, get_fusion_order, get_sales
  15. logger = setup_logger(__name__)
  16. class LargeOrderService:
  17. """
  18. 大单匹配业务逻辑服务
  19. """
  20. def __init__(self, output_path="./output/"):
  21. self.output_path = output_path
  22. self.intent_prompt = INTENT_PROMPT
  23. self.answer_prompt = ANSWER_PROMPT
  24. def check_file_exists(self, directory, filename):
  25. """
  26. 检查特定目录下是否存在指定文件。
  27. :param directory: 目录路径
  28. :param filename: 文件名
  29. :return: 如果文件存在返回 True,否则返回 False
  30. """
  31. file_path = os.path.join(directory, filename)
  32. return os.path.isfile(file_path)
  33. def has_alphanumeric_mix(self, text):
  34. """判断字符串中是否包含字母和数字混合的子字符串"""
  35. pattern = r'(?=.*[a-zA-Z])(?=.*\d)'
  36. return bool(re.search(pattern, text))
  37. def has_number_isdigit(self, text):
  38. """使用isdigit()方法检查字符串中是否包含数字"""
  39. return any(char.isdigit() for char in text)
  40. # 根据SKC查询商品信息
  41. def get_skc_info(self, skc, json_file, get_fields):
  42. """
  43. 根据skc值从JSON文件中获取指定多个字段的信息
  44. 参数:
  45. skc: 要匹配的skc值
  46. json_file: JSON文件路径
  47. get_fields (list): 需要获取的字段名列表(如 ["price", "name"])
  48. 返回:
  49. dict: 包含所有指定字段的字典(字段不存在时值为None);若未找到skc则返回None
  50. """
  51. # 读取JSON文件信息
  52. file_info = read_json_file(json_file)
  53. # 定义所有需要遍历的goods_info路径(扁平化处理)
  54. goods_info_sources = [
  55. [file_info["primary_goods_info"]["goods_info"]], # 单个字典的goods_info
  56. [item["goods_info"] for item in file_info["combine_two_info"]], # 二级组合
  57. *[item["goods_info"] for item in file_info["combine_three_info"]] # 三级组合(展开嵌套列表)
  58. ]
  59. # 遍历所有数据源,匹配skc后提取指定字段
  60. for info_list in goods_info_sources:
  61. for item in info_list:
  62. if item.get("skc") == skc:
  63. # 提取get_fields列表中的所有字段,不存在则为None
  64. return {field: item.get(field) for field in get_fields}
  65. # 未找到匹配的skc时返回None
  66. return None
  67. # 获取主推款商品信息
  68. def get_primary_goods_info(self, primary_sku: str, start_date:str = None, end_date: str = None):
  69. # 1. 获取商品ID、颜色ID
  70. goods_code = primary_sku[:-3]
  71. color_code = primary_sku[-3:]
  72. combine_id = get_sku_id(goods_code, color_code)
  73. goods_id = combine_id[0]
  74. color_id = combine_id[1]
  75. # 2. 获取主推款频次
  76. result_freq_two = get_core_sku_freq(
  77. goods_code = goods_code,
  78. color_code = color_code,
  79. goods_id = goods_id,
  80. color_id = color_id,
  81. query_type = "22",
  82. start_date = start_date,
  83. end_date = end_date
  84. )
  85. result_freq_three = get_core_sku_freq(
  86. goods_code = goods_code,
  87. color_code = color_code,
  88. goods_id = goods_id,
  89. color_id = color_id,
  90. query_type = "33",
  91. start_date = start_date,
  92. end_date = end_date
  93. )
  94. # 3. 获取主推款商品信息
  95. result_info = get_sku_info(goods_id, color_id)
  96. result_image = get_sku_image(goods_code+color_code)
  97. logger.info(f"\nfreq22:{result_freq_two}\nfreq33:{result_freq_three}\ninfo:{result_info}")
  98. goods_info = {
  99. "sku": result_info[0],
  100. "skc": str(result_info[0]) + str(result_info[2]),
  101. "name": result_info[1],
  102. "color_code": result_info[2],
  103. "color": result_info[3],
  104. "season": result_info[4],
  105. "category": result_info[5],
  106. "price": float(result_info[6]),
  107. "image_url": result_image["image_url"],
  108. "image_path": result_image["image_path"]
  109. }
  110. return {
  111. "goods_info": goods_info,
  112. "goods_code": goods_code,
  113. "color_code": color_code,
  114. "goods_id": goods_id,
  115. "color_id": color_id,
  116. "freq_two": int(result_freq_two),
  117. "freq_three": int(result_freq_three)
  118. }
  119. # 获取连带款商品信息
  120. def get_sub_goods_info(self, fusion_order: list, combine_type: Literal["22", "33"]):
  121. if combine_type == "22":
  122. goods_color = fusion_order[0]
  123. goods_count = fusion_order[1]
  124. goods_id = goods_color.split('-')[0]
  125. color_id = goods_color.split('-')[1]
  126. result_info = get_sku_info(goods_id, color_id)
  127. goods_info = {
  128. "sku": result_info[0],
  129. "name": result_info[1],
  130. "color_code": result_info[2],
  131. "color": result_info[3],
  132. "season": result_info[4],
  133. "category": result_info[5],
  134. "price": float(result_info[6]),
  135. "goods_id": goods_id,
  136. "color_id": color_id,
  137. "skc": str(result_info[0]) + str(result_info[2])
  138. }
  139. return {
  140. "goods_info": goods_info,
  141. "freq": int(goods_count),
  142. "count": int(goods_count)*2
  143. }
  144. elif combine_type == "33":
  145. goods_color = fusion_order[:-1]
  146. goods_count = fusion_order[-1]
  147. goods_info = []
  148. for combine_id in goods_color:
  149. goods_id = combine_id.split('-')[0]
  150. color_id = combine_id.split('-')[1]
  151. result_info = get_sku_info(goods_id, color_id)
  152. result = {
  153. "sku": result_info[0],
  154. "name": result_info[1],
  155. "color_code": result_info[2],
  156. "color": result_info[3],
  157. "season": result_info[4],
  158. "category": result_info[5],
  159. "price": float(result_info[6]),
  160. "goods_id": goods_id,
  161. "color_id": color_id,
  162. "skc": str(result_info[0]) + str(result_info[2])
  163. }
  164. goods_info.append(result)
  165. return {
  166. "goods_info": goods_info,
  167. "freq": int(goods_count),
  168. "count": int(goods_count)*3
  169. }
  170. # 获取主推款&连带款
  171. def get_all_goods_info(self, primary_sku: str, start_date: str = None, end_date: str = None):
  172. logger.info(f"-----------------执行连带计算:{primary_sku}-----------------")
  173. # 1. 获取主商品信息
  174. primary_goods_info = self.get_primary_goods_info(primary_sku, start_date, end_date)
  175. # 2. 获取连带商品
  176. combine_results = {
  177. type_: get_fusion_order(
  178. goods_code = primary_goods_info["goods_code"],
  179. color_code = primary_goods_info["color_code"],
  180. goods_id = primary_goods_info["goods_id"],
  181. color_id = primary_goods_info["color_id"],
  182. query_type = type_,
  183. start_date = start_date,
  184. end_date = end_date
  185. )
  186. for type_ in ["22", "33"]
  187. }
  188. combine_two = combine_results["22"]
  189. combine_three = combine_results["33"]
  190. # 3. 获取连带商品信息
  191. combine_two_info = []
  192. for item in combine_two:
  193. item_info = self.get_sub_goods_info(item, '22')
  194. # 筛选22组合
  195. # can_combine = filter_goods.can_combine([primary_goods_info["goods_info"], item_info["goods_info"]])
  196. # if can_combine:
  197. # 筛选22组合-开始
  198. sales = get_sales(
  199. goods_code = primary_goods_info["goods_code"],
  200. color_code = primary_goods_info["color_code"],
  201. goods_id = item_info["goods_info"]["goods_id"],
  202. color_id = item_info["goods_info"]["color_id"],
  203. query_type = "22",
  204. sub_goods_id = None,
  205. sub_color_id = None,
  206. start_date = start_date,
  207. end_date = end_date
  208. )
  209. result_image = get_sku_image(item_info["goods_info"]["sku"]+item_info["goods_info"]["color_code"])
  210. item_info["goods_info"]["image_url"] = result_image["image_url"]
  211. item_info["goods_info"]["image_path"] = result_image["image_path"]
  212. item_info["main_freq"] = primary_goods_info["freq_two"]
  213. item_info["sales"] = sum([float(sale) for sale in sales])
  214. item_info["combine_rate"] = item_info["freq"] / primary_goods_info["freq_two"]
  215. combine_two_info.append(item_info)
  216. # else:
  217. # logger.info(f"不符合22组合逻辑!")
  218. combine_three_info = []
  219. for item in combine_three:
  220. item_info = self.get_sub_goods_info(item, '33')
  221. # 筛选33组合
  222. # can_combine = filter_goods.can_combine([primary_goods_info["goods_info"], item_info["goods_info"][0], item_info["goods_info"][1]])
  223. # if can_combine:
  224. # 筛选33组合
  225. sales = get_sales(
  226. goods_code = primary_goods_info["goods_code"],
  227. color_code = primary_goods_info["color_code"],
  228. goods_id = item_info["goods_info"][0]["goods_id"],
  229. color_id = item_info["goods_info"][0]["color_id"],
  230. query_type = "33",
  231. sub_goods_id = item_info["goods_info"][1]["goods_id"],
  232. sub_color_id = item_info["goods_info"][1]["color_id"],
  233. start_date = start_date,
  234. end_date = end_date
  235. )
  236. for item in item_info["goods_info"]:
  237. result_image = get_sku_image(item["sku"]+item["color_code"])
  238. item["image_url"] = result_image["image_url"]
  239. item["image_path"] = result_image["image_path"]
  240. item_info["main_freq"] = primary_goods_info["freq_three"]
  241. item_info["sales"] = sum([float(sale) for sale in sales])
  242. if primary_goods_info["freq_three"] == 0:
  243. logger.info(f"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx:除数不能为0")
  244. item_info["combine_rate"] = 0.0001
  245. else:
  246. item_info["combine_rate"] = item_info["freq"] / primary_goods_info["freq_three"]
  247. combine_three_info.append(item_info)
  248. # else:
  249. # logger.info(f"不符合33组合逻辑!")
  250. result = {
  251. "primary_goods_info": primary_goods_info,
  252. "combine_two_info": combine_two_info,
  253. "combine_three_info": combine_three_info
  254. }
  255. file_name = primary_goods_info["goods_code"] + primary_goods_info["color_code"] + '.json'
  256. file_path = os.path.join('./output', file_name)
  257. save_dict_to_json(result, file_path)
  258. logger.info(f"保存连带计算结果:{file_path}")
  259. # xlsx_file = export_to_excel(result)
  260. # xlsx_url = upload_file_to_tos(xlsx_file)
  261. return {
  262. "primary_sku": primary_sku,
  263. "json_file": file_path
  264. # "xlsx_file": xlsx_file,
  265. # "xlsx_url": xlsx_url
  266. }
  267. # 大单搭配
  268. def generate_outfit_orders(self, script_path):
  269. """
  270. 生成包含商品详情的搭配订单列表
  271. 参数:
  272. script_path (str): 存储搭配规则数据的JSON文件路径
  273. product_info_file (str): 存储商品详细信息的JSON文件路径
  274. 返回:
  275. dict: 搭配订单结果,格式为 {"outfit_orders": [搭配1, 搭配2, ...]}
  276. 其中每个搭配配是包含商品详情的字典列表
  277. """
  278. # 1. 读取搭配数据并提取核心商品信息
  279. try:
  280. outfit_data = read_json_file(script_path)
  281. except Exception as e:
  282. raise ValueError(f"读取搭配数据文件失败: {str(e)}") from e
  283. # 提取主推款商品基础信息
  284. primary_outfit = outfit_data.get("primary_goods_info", [])
  285. # 提取两套搭配中的商品基础信息
  286. two_outfits = outfit_data.get("combine_two_info", [])
  287. # 提取三套搭配中的商品基础信息
  288. three_outfits = outfit_data.get("combine_three_info", [])
  289. flatten = lambda lst: [item for sublist in lst for item in sublist]
  290. outfits_list = [
  291. [item.get("goods_info", {}) for item in [primary_outfit]],
  292. [item.get("goods_info", {}) for item in two_outfits],
  293. flatten([*[item.get("goods_info", {}) for item in three_outfits ]])
  294. ]
  295. # 筛选需要的核心字段(用于后续QA模型)
  296. target_core_fields = ["skc", "name", "color", "season", "category"]
  297. outfits_info_list = [extract_fields(item, target_core_fields) for item in outfits_list]
  298. outfits_info = {
  299. "主推款": outfits_info_list[0],
  300. "连带款": outfits_info_list[1] + outfits_info_list[2]
  301. }
  302. # 2. 通过QA模型生成搭配组合和理由
  303. try:
  304. qa_response = text_qa(str(outfits_info))
  305. parsed_qa_result = string_to_json(qa_response)
  306. logger.info(f"AI搭配结果:{parsed_qa_result}")
  307. except Exception as e:
  308. raise RuntimeError(f"生成或解析搭配结果失败: {str(e)}") from e
  309. # 安全获取搭配组合和对应理由(默认空列表避免报错)
  310. outfit_combinations = parsed_qa_result.get("outfit_combine", [])
  311. combination_reasons = parsed_qa_result.get("combine_reason", [])
  312. logger.info(f"outfit_combinations: \n{outfit_combinations}")
  313. logger.info(f"combination_reasons: \n{combination_reasons}")
  314. # 3. 为每个搭配组合补充商品详细信息(价格、图片路径等)
  315. outfit_orders = []
  316. for skc_combination, reason in zip(outfit_combinations, combination_reasons):
  317. # 构建单个搭配组合的完整信息
  318. detailed_combination = []
  319. for product_skc in skc_combination:
  320. # 获取商品详细字段(确保返回字典,避免None导致KeyError)
  321. product_details = self.get_skc_info(product_skc, script_path, ["price", "image_path"]) or {}
  322. # 补充SKC作为唯一标识
  323. product_details["skc"] = product_skc
  324. product_details["reason"] = reason
  325. logger.info(f"product_details:\n{product_details}")
  326. detailed_combination.append(product_details)
  327. outfit_orders.append(detailed_combination)
  328. outfit_data["outfit_orders"] = outfit_orders
  329. save_dict_to_json(outfit_data, script_path)
  330. return outfit_data
  331. # 主流程
  332. def pipeline(self, primary_sku: str, start_date: str = None, end_date: str = None):
  333. qa_sku = primary_sku.upper()
  334. process_success = False
  335. # 1. 计算TOP20连带款
  336. mid_result = self.get_all_goods_info(qa_sku, start_date, end_date)
  337. # 2. 执行大单搭配
  338. mid_result_json = mid_result.get("json_file")
  339. final_result = self.generate_outfit_orders(mid_result_json)
  340. # 3. 搭配结果写入表格
  341. try:
  342. xlsx_file = export_to_excel(final_result)
  343. # 4. 搭配结果上传云端
  344. xlsx_url = upload_file_to_tos(xlsx_file)
  345. process_success = True
  346. logger.info(f"成功完成主推款搭配:{qa_sku}")
  347. except Exception as e:
  348. process_success = False
  349. logger.info(f"主推款搭配组合失败:{qa_sku}, 报错:{e}")
  350. # 5. 构建QA内容返回
  351. result = {
  352. "primary_sku": mid_result.get("primary_sku"),
  353. "json_file": mid_result_json,
  354. "xlsx_file": xlsx_file,
  355. "xlsx_url": xlsx_url,
  356. "success": process_success
  357. }
  358. # 6. 构建QA知识库
  359. qa_file = add_suffix(mid_result_json, "_qa")
  360. qa_content = {
  361. "主推款": mid_result.get("primary_sku"),
  362. "结果链接": xlsx_url,
  363. "是否成功": process_success
  364. }
  365. save_dict_to_json(qa_content, qa_file)
  366. return result
  367. def large_order_robot(self, user_query):
  368. # 意图识别
  369. intent_result = intent_reg(user_query)
  370. intent_json = string_to_json(intent_result)
  371. # 提取意图识别结果
  372. query_intent = intent_json.get("意图类型")
  373. query_sku = intent_json.get("咨询款号")
  374. query_time = intent_json.get("咨询时间")
  375. # 非咨询服装搭配
  376. if query_sku == None or not self.has_alphanumeric_mix(str(query_sku)):
  377. logger.info(f"一般性问题,快速生成答复中!")
  378. response = large_order_qa(user_query, query_intent)
  379. # 咨询服装搭配
  380. else:
  381. excel_file = f"{uuid.uuid4()}.xlsx"
  382. # 指定查询时间:计算所有查询
  383. if query_time and self.has_number_isdigit(str(query_time)):
  384. start_date = query_time.split('_')[0]
  385. end_date = query_time.split('_')[-1]
  386. logger.info(f"基于指定查询时间:{query_time},执行对:{str(query_sku)} 的统计计算!")
  387. for primary_sku in query_sku:
  388. self.pipeline(primary_sku, start_date, end_date)
  389. # 未指定查询时间:计算未统计的
  390. else:
  391. exist_dict = {sku: self.check_file_exists(directory="./output", filename=f"{sku}.xlsx") for sku in query_sku}
  392. failed_sku = [sku for sku, exists in exist_dict.items() if not exists]
  393. if len(failed_sku) == 0:
  394. logger.info(f"查询款号统计结果已预生成!")
  395. for primary_sku in failed_sku:
  396. logger.info(f"基于当前一个月时间,执行对:{str(failed_sku)} 的统计计算!")
  397. self.pipeline(primary_sku)
  398. logger.info(f"数据上传中,请稍等!")
  399. excel_path = batch_export_to_excel(query_sku, excel_file)
  400. excel_url = upload_file_to_tos(excel_path)
  401. response = large_order_qa(user_query, query_intent, ANSWER_PROMPT.format(query=str(query_sku), answer=excel_url))
  402. return response
  403. large_order_service = LargeOrderService()
  404. if __name__ == "__main__":
  405. large_order_service = LargeOrderService()
  406. # query = "请给出1B9L6E04086Y、1ACLAB10A86Y在七月十六到八月十六期间的大单组合结果"
  407. # # query = "你可以为我做些什么呢?"
  408. # response = large_order_service.large_order_robot(query)
  409. # print(response)
  410. priamry_sku = ["1ECRAC07007X", "1CNC6N23008H", "1ECR6J0Z011R", "10NL5J89032H", "1A9L1A20086Y", "1E9CCD23005W"] # "1ECR6J0Z011R", "1ENR8B33005W"
  411. # 1ECRAC07007X 1CNC6N23008H 1ECR6J0Z011R 10CL6E57038H 1ENC8B17005W 10NL5J89032H 1A9L1A20086Y 1E9CCD23005W
  412. for sku in priamry_sku:
  413. try:
  414. result = large_order_service.pipeline(sku, "2025-11-03", "2025-11-09")
  415. print(result)
  416. except Exception as e:
  417. logger.error(f"Error processing {sku}: {e}")
  418. # logger.info(result["primary_goods_info"])
  419. # two = result["combine_two_info"]
  420. # for item in two:
  421. # logger.info(item)
  422. # three = result["combine_three_info"]
  423. # for item in three:
  424. # logger.info(item)