浏览代码

create on local

Y 3 月之前
当前提交
3902a5166c

+ 133 - 0
pdf_parser.py

@@ -0,0 +1,133 @@
+import os
+import re
+import fitz
+import json
+import argparse
+import gradio as gr
+from tqdm import tqdm
+from utils.module import save_json
+from utils.pdf_extract import extract_text
+from utils.text_parser import content_extract
+
+def pdf2json(pdf_path):
+    save_path = "./pdf_json/" + os.path.splitext(os.path.basename(pdf_path))[0] + "/"
+    if not os.path.exists(save_path):
+        os.mkdir(save_path)
+
+    pattern_1 = r'货\s*号'
+    pattern_2 = r'款\s*号'
+    doc = fitz.open(pdf_path)
+    page_number =  len(doc)
+    print(f"page number: {page_number}")
+    for i in tqdm(range(page_number+1)):
+        text = extract_text(pdf_path, i)
+        # print(f"page-{i} content: {text}")
+        if re.search(pattern_1, text) or re.search(pattern_2, text):
+            json_content = content_extract(text)
+            json_file = json.loads(json_content)
+            save_json(f'{save_path}{json_file["货号"]}.json', json_file)
+
+    return save_path
+
+
+def pdf2image(pdfPath, zoom_x=2, zoom_y=2, rotation_angle=0):
+    # 创建图像保持目录
+    imgPath = "./pdf_image/" + os.path.splitext(os.path.basename(pdfPath))[0] + "/"
+    if not os.path.exists(imgPath):
+        os.mkdir(imgPath)
+    # 打开PDF文件
+    pdf = fitz.open(pdfPath)
+    # 逐页读取PDF
+    for pg in tqdm(range(0, pdf.page_count)):
+        pattern_1 = r'货\s*号'
+        pattern_2 = r'款\s*号'
+        page = pdf[pg]
+        page_content = page.get_text()
+        if re.search(pattern_1, page_content) or re.search(pattern_2, page_content):
+            print(f"process: {pg}")
+            json_content = content_extract(page_content)
+            json_file = json.loads(json_content)
+            # 设置缩放和旋转系数
+            trans = fitz.Matrix(zoom_x, zoom_y).prerotate(rotation_angle)
+            pm = page.get_pixmap(matrix=trans, alpha=False)
+            # 开始写图像
+            pm._writeIMG(imgPath + json_file["货号"] + ".png", format_="png", jpg_quality=100)
+    pdf.close()
+
+    return imgPath
+
+def add_image(json_file_path, image_url):
+    """添加商品图像字段到指定的JSON文件"""
+    # 读取JSON文件
+    with open(json_file_path, 'r', encoding='utf-8') as file:
+        data = json.load(file)
+
+    # 添加“商品图像”字段
+    data['商品图像'] = image_url
+
+    # 将更新后的数据写回到JSON文件
+    with open(json_file_path, 'w', encoding='utf-8') as file:
+        json.dump(data, file, ensure_ascii=False, indent=4)
+
+    print("已成功添加“商品图像”字段。")
+
+def add_info(json_folder, image_folder):
+    miss = 0
+    for filename in tqdm(os.listdir(json_folder)):
+        json_file_path = os.path.join(json_folder, filename)
+        image_url = os.path.join("./database/image", filename.replace("json","png"))
+        image_exit = os.path.join(image_folder, filename.replace("json","png"))
+        if os.path.exists(image_exit):
+            print(f"正在处理:{image_url}")
+            add_image(json_file_path, image_url)
+        else:
+            miss += 1
+            print(f"图片文件不存在:{image_exit}")
+    print(f"图片文件缺失数量:{miss}")
+
+def pdf2db(pdf_path):
+    # 检查是否上传了文件
+    if not pdf_path:
+        return "请先上传PDF文件"
+    try:
+        json_folder = pdf2json(pdf_path)
+        image_folder = pdf2image(pdf_path)
+        add_info(json_folder, image_folder)
+        os.system(f'cp {json_folder}* ./database/meta/')
+        os.system(f'cp {image_folder}* ./database/image/')
+        return "已完成PDF解析..."
+    except Exception as e:
+        return f"解析过程中发生错误:{str(e)}"
+
+# 创建Gradio界面
+with gr.Blocks(title="PDF解析器") as app:
+    gr.Markdown("## PDF文件解析工具")
+    gr.Markdown("上传PDF文件后点击解析按钮查看文本内容")
+    
+    with gr.Row():
+        # 文件上传组件
+        file_input = gr.File(
+            label="选择PDF文件",
+            file_types=[".pdf"],
+            file_count="single"
+        )
+        # 解析按钮
+        parse_btn = gr.Button("解析PDF", variant="primary")
+    
+    # 结果显示组件
+    text_output = gr.Textbox(
+        label="解析结果",
+        placeholder="解析后的文本将显示在此处...",
+        lines=20,
+        max_lines=50
+    )
+    
+    # 绑定按钮点击事件
+    parse_btn.click(
+        fn=pdf2db,
+        inputs=file_input,
+        outputs=text_output
+    )
+
+if __name__ == "__main__":
+    app.launch(server_name='0.0.0.0', server_port=1111)

+ 79 - 0
readme.md

@@ -0,0 +1,79 @@
+# SKU搜索系统
+
+这是一个基于Gradio构建的SKU(库存单位)搜索和管理系统,提供商品信息搜索、PDF文件解析、商品卖点生成等功能。
+
+## 功能特性
+
+- **商品信息搜索**:通过商品ID快速查询商品详细信息,包括商品图片、价格、颜色、成分、卖点和细节
+- **PDF文件解析**:支持上传PDF文件并解析其中的商品信息
+- **商品卖点生成**:基于商品ID和图片自动生成中英文卖点描述
+- **卖点标题生成**:根据商品ID和描述生成中英文标题
+
+## 系统要求
+
+- Python 3.x
+- 操作系统:Windows/Linux/MacOS
+
+## 安装步骤
+
+1. 克隆项目到本地:
+```bash
+git clone [项目地址]
+cd sku_search
+```
+
+2. 安装依赖包:
+```bash
+pip install -r requirement.txt
+```
+
+## 使用方法
+
+1. 启动服务:
+```bash
+python sku_search.py
+```
+
+## 主要功能说明
+
+### 商品信息搜索
+- 输入商品ID即可查询完整的商品信息
+- 支持查看商品图片、价格、颜色、成分等详细信息
+
+### PDF文件解析
+- 支持上传PDF文件
+- 自动解析PDF中的商品信息并保存到数据库
+
+### 商品卖点生成
+- 输入商品ID和图片(可选)
+- 自动生成中英文卖点描述
+- 提取三个关键卖点
+
+### 卖点标题生成
+- 输入商品ID和描述(可选)
+- 自动生成中英文标题
+
+## 项目结构
+
+```
+sku_search/
+├── database/          # 数据库文件
+├── pdf_image/         # PDF图片存储
+├── pdf_json/          # PDF解析结果
+├── static/            # 静态资源
+├── templates/         # 模板文件
+├── utils/             # 工具函数
+├── sku_search.py      # 主程序
+├── pdf_parser.py      # PDF解析模块
+└── requirement.txt    # 依赖包列表
+```
+
+## 注意事项
+
+- 确保数据库目录存在并有正确的访问权限
+- PDF文件解析需要确保文件格式正确
+- 商品ID需要与数据库中的记录匹配
+
+## 技术支持
+
+如有问题或建议,请联系项目维护者。

+ 5 - 0
requirements.txt

@@ -0,0 +1,5 @@
+volcenginesdkarkruntime
+pillow
+pandas
+fitz
+gradio

+ 111 - 0
sku_search.py

@@ -0,0 +1,111 @@
+import os
+import json
+import gradio as gr
+from utils.chat import process_sellpoint,process_title
+from pdf_parser import pdf2db
+
+def search_json_files(filename):
+    directory = "./database/meta"
+    for file in os.listdir(directory):
+        if file.endswith('.json') and filename in file:
+            with open(os.path.join(directory, file), 'r') as f:
+                data = json.load(f)
+    return data["商品图像"], data["价格"], data["色号"], data["成分"], data["关键词"], data["商品细节"]
+
+def build_ui():
+    with gr.Blocks() as demo:
+        with gr.Tab("商品信息搜索"):
+            with gr.Column():
+                with gr.Row():
+                    with gr.Column():
+                        search_id = gr.Textbox(label="商品ID", value="1CNC6N230")
+                        id_image = gr.Image(label="商品图")
+                    with gr.Column():
+                        id_price = gr.Textbox(label="价格", value="¥1999")
+                        id_color = gr.Textbox(label="颜色", value="深花灰")
+                        id_ingredient = gr.Textbox(label="成分", value="羊毛")
+                        id_selling_point = gr.Textbox(label="商品卖点", value="落肩直身宽松行,一手长经典款初胎羊毛")
+                        id_details = gr.Textbox(label="商品细节", value="全羊毛双面呢,采用初剪细支羊毛精制而成,呢面绒毛均匀顺直,光泽温和高贵,手感细腻,挺括而富有弹性")
+            search_button = gr.Button("Search")
+            search_button.click(fn=search_json_files, inputs=[search_id], outputs=[id_image,id_price, id_color, id_ingredient, id_selling_point, id_details])
+        with gr.Tab("PDF文件解析"):
+            with gr.Row():
+                file_input = gr.File(label="选择PDF文件", file_types=[".pdf"], file_count="single")
+                parse_btn = gr.Button("开始解析", variant="primary")
+            text_output = gr.Textbox(label="解析进度", placeholder="解析进度", lines=20, max_lines=50)
+            parse_btn.click(fn=pdf2db, inputs=file_input, outputs=text_output)
+        with gr.Tab("商品卖点生成"):
+            gr.Markdown("## 图库里若有商品图,可只填款号")
+            with gr.Row():
+                with gr.Column():
+                    id = gr.Textbox(label="商品ID")
+                    input_image = gr.Image(label="商品图(选填)", type="pil")
+                    input_info = gr.Textbox(label="输入的卖点(选填)")
+                    output_ch_info = gr.Textbox(label="输出的中文卖点")
+                    output_en_info = gr.Textbox(label="输出的英文卖点")
+                    output_ky_info = gr.Textbox(label="输出的三个关键卖点")
+                with gr.Column():
+                    product_image = gr.Image(label="商品图")
+                    product_price = gr.Textbox(label="价格")
+                    product_color = gr.Textbox(label="颜色")
+                    product_ingredient = gr.Textbox(label="成分")
+                    product_selling_point = gr.Textbox(label="商品卖点")
+                    product_details = gr.Textbox(label="商品细节")
+            sell_point_button = gr.Button("生成卖点")
+            sell_point_button.click(fn=process_sellpoint, 
+                inputs=[id, input_image, input_info],
+                outputs=[output_ch_info, output_en_info, output_ky_info, 
+                        product_image, product_price, product_color, 
+                        product_ingredient, product_selling_point, product_details])
+        with gr.Tab("生成卖点标题"):
+            gr.Markdown("## 图库里若有商品图,可只填款号")
+            with gr.Row():
+                with gr.Column():
+                    p_id = gr.Textbox(label="商品ID")
+
+                    input_info = gr.Textbox(label="输入商品描述(可选)")
+                    output_title_en = gr.Textbox(label="输出的英文标题")
+                    output_title_ch = gr.Textbox(label="输出的中文标题")
+                    sell_title_button = gr.Button("生成卖点")
+                    sell_title_button.click(fn=process_title, 
+                    inputs=[p_id,input_info],
+                    outputs=[output_title_en,output_title_ch])
+
+            
+    return demo
+
+if __name__ == "__main__":
+    demo=build_ui()
+    demo.launch(server_name='0.0.0.0', server_port=3333)
+
+# from flask import Flask, render_template, request, redirect, url_for
+# import json
+# import os
+
+# app = Flask(__name__)
+
+# def add_product_to_json(product_data):
+#     """将商品信息添加到JSON文件"""
+#     json_file_path = './database/meta/1B1L9H030.json'  # 请根据实际情况调整路径
+#     with open(json_file_path, 'r+', encoding='utf-8') as file:
+#         data = json.load(file)
+#         data.append(product_data)  # 假设数据是一个列表
+#         file.seek(0)
+#         json.dump(data, file, ensure_ascii=False, indent=4)
+
+# @app.route('/', methods=['GET', 'POST'])
+# def index():
+#     if request.method == 'POST':
+#         product_data = {
+#             '商品名称': request.form['name'],
+#             '商品价格': request.form['price'],
+#             '商品描述': request.form['description'],
+#             '商品图像': request.form['image_url']
+#         }
+#         add_product_to_json(product_data)
+#         return redirect(url_for('index'))
+
+#     return render_template('index.html')
+
+# if __name__ == "__main__":
+#     app.run(debug=True)

+ 0 - 0
utils/utils/__init__.py


二进制
utils/utils/__pycache__/__init__.cpython-310.pyc


二进制
utils/utils/__pycache__/chat.cpython-310.pyc


二进制
utils/utils/__pycache__/chat_old.cpython-310.pyc


二进制
utils/utils/__pycache__/conf.cpython-310.pyc


二进制
utils/utils/__pycache__/doubao.cpython-310.pyc


二进制
utils/utils/__pycache__/llm.cpython-310.pyc


二进制
utils/utils/__pycache__/module.cpython-310.pyc


二进制
utils/utils/__pycache__/pdf_extract.cpython-310.pyc


二进制
utils/utils/__pycache__/process_meta.cpython-310.pyc


二进制
utils/utils/__pycache__/prompt.cpython-310.pyc


二进制
utils/utils/__pycache__/text_parser.cpython-310.pyc


+ 259 - 0
utils/utils/chat.py

@@ -0,0 +1,259 @@
+from io import BytesIO
+import sys
+
+import numpy as np
+import requests
+# print(sys.path)
+sys.path.append("/data/data/luosy/project/sku_search")
+import time
+import os
+from PIL import Image
+import uuid
+from  utils.prompt import *
+from utils.llm import *
+import json
+from utils.conf import *
+import re
+
+
+history_list=[]
+
+plugins = {
+    "ch_en_selling_points":get_ch_en_selling_points,
+    "en_ch_selling_points":get_en_ch_selling_points,
+    "ch_en_selling_title":get_ch_en_selling_title,
+    "en_ch_selling_points_his":get_en_ch_selling_points_his,
+    "TextControl_his":TextControl_his
+}
+
+def search_json_files(filename):
+    directory = "./database/meta"
+    data = {}
+    for file in os.listdir(directory):
+        if file.endswith('.json') and filename in file:
+            with open(os.path.join(directory, file), 'r') as f:
+                data = json.load(f)
+    if data =={}:
+        return None,"暂无数据","暂无数据","暂无数据","暂无数据","暂无数据"
+    return data["商品图像"], data["价格"], data["色号"], data["成分"], data["关键词"], data["商品细节"]
+
+
+def is_valid_response(response_dict):
+    """
+    检查响应是否是有效的JSON格式且包含所需字段
+    """
+    try:
+        # response_dict = json.loads(response) 
+        required_fields = ["服装描述(中文)", "服装描述(英文)", "3个关键点(英文)"]
+        return all(field in response_dict for field in required_fields)
+    except Exception as e:
+        return False
+
+
+
+def convert_to_dict(s):
+    s= s.replace('json','').replace('```','').strip() if 'json' in s else s
+    s=json.loads((s))
+    return s
+
+
+def extract_response_values(response_dict):
+    """
+    从响应中提取各个字段的值
+    Args:
+        response: JSON格式的响应字符串
+    Returns:
+        tuple: (中文描述, 英文描述, 关键点)
+    """
+    try:
+        # response_dict = json.loads(response)
+        cn_desc = response_dict.get("服装描述(中文)", "")
+        en_desc = response_dict.get("服装描述(英文)", "")
+        key_points = response_dict.get("3个关键点(英文)", "")
+        return cn_desc, en_desc, key_points
+    except Exception as e:
+        return "", "", ""
+
+# def read_image(im_data):
+#     out_image=None
+#     if isinstance(im_data, str):
+#         if im_data.startswith("http"):
+#             headers = {
+#                 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
+#             }
+#             im_path = BytesIO(requests.get(im_data, headers=headers).content)
+#         else:
+#             im_path = im_data
+#         out_image = Image.open(im_path)
+#     elif isinstance(im_data, Image.Image):
+#         out_image = im_data
+#     elif isinstance(im_data, np.ndarray):
+#         out_image = Image.fromarray(im_data)
+
+#     return out_image  # make a batch of image, shape
+
+
+def format_history(strings, indent="    "):
+    result = ""
+    for i, string in enumerate(strings, start=1):
+        # 拼接序号、缩进和字符串,并添加换行符
+        result += f"{indent}{i}. {string}\n"
+    return result
+
+def get_history():
+    global history_list
+    if len(history_list)==0:
+        history=''
+    else:
+        history=format_history(history_list)
+    return history
+def add_history(input,max_num=20):
+    global history_list
+    text = re.split(r'[,\.\!\?\;\:]+', input)
+    text=text[0].strip()
+    history_list.insert(0, text)
+    if len(history_list)>max_num:
+        history_list=history_list[:max_num]
+
+
+def generate_text(plm_info,img,graphic_label=None,method="en_ch_selling_points_his",plat="ali",model_name="mm_tyqw"):
+    # img=read_image(img)
+    his=get_history()
+    if graphic_label:
+        plm_info+="\n以下是该衣服的关键点:"+graphic_label
+    if plat=="ali":
+        key=ali_ky
+        model=ali_model[model_name]
+        print('3333333333333')
+    else:
+        key=doubao_ky
+        model=doubao_model[model_name]
+    usrp = plugins[method](plm_info,his)
+    llm=llm_request(*key,model)
+
+    for _ in range(3):
+        try:
+            result = llm.llm_mm_request(usrp,img)
+            result=convert_to_dict(result)
+            if is_valid_response(result):
+                ch,en,kw=extract_response_values(result)
+                add_history(en)
+                break
+        except Exception as e:
+            print(f"Attempt failed with error: {str(e)}")
+
+    # result={"descr":en,"keywords":kw}
+    return ch,en,kw
+
+
+def generate_text_new(plm_info,img,graphic_label=None,method="TextControl_his",plat="ali",model_name1="mm_tyqw",model_name2="text_qwen"):
+
+    his=get_history()
+    if graphic_label:
+        plm_info+="\n以下是该衣服的关键点:"+graphic_label
+    if plat=="ali":
+        key=ali_ky
+        model1=ali_model[model_name1]
+        model2=ali_model[model_name2]
+    else:
+        key=doubao_ky
+        model=doubao_model[model_name1]
+    sim_prompt="如果图片里有文字的话,请结合图片里的衣服和文本信息进行描述一下衣服,如果图片是下半身就只描述下半身,如果是上半身就只描述上半身,全身就描述全身的衣服,除衣服之外的配饰不要描述,还要具体到衣服的风格"
+    llm_mm=llm_request(*key,model1)
+    llm_lm=llm_request(*key,model2)
+    res1 = llm_mm.llm_mm_request(sim_prompt,img)
+    print(res1)
+    sys_prompt,usr_prompt = plugins[method](res1,his)
+    for _ in range(3):
+        try:
+            result=llm_lm.llm_text_request(usr_prompt,sys_prompt)
+            result=convert_to_dict(result)
+            # print(result)
+            if is_valid_response(result):
+                ch,en,kw=extract_response_values(result)
+                add_history(en)
+                break
+        except Exception as e:
+            print(f"Attempt failed with error: {str(e)}")
+
+    # result={"descr":en,"keywords":kw}
+    return ch,en,kw
+def process_sellpoint(id,image=None,info=""):
+    if len(id) == 9:
+        id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=search_json_files(id)
+        print(id_image,id_price, id_color, id_ingredient, id_selling_point, id_details)
+
+    else:
+        id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=None,None,None,None,None,None
+
+    info=id_details+info if id_details else info
+    if image == None and id_image != None:
+        image=Image.open(id_image)
+
+    ch_sen,en_sen,key_point=generate_text(info,image)
+    return ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details
+
+
+def gen_title(info,tags=None,referencr_title=None,method="ch_en_selling_title",plat="ali",model_name="text_dsv3"):
+    
+    if tags:
+        tags_sen=",".join(tags)
+        info="以下是该衣服的关键点:"+tags_sen
+    if referencr_title:
+        info="\n请以这条标题样例的结构作为借鉴来写这条标题:"+referencr_title
+    sysp,usrp = plugins[method](info)
+    if plat=="ali":
+        key=ali_ky
+        model=ali_model[model_name]
+    else:
+        key=doubao_ky
+        model=doubao_model[model_name]
+    
+    llm=llm_request(*key,model)
+    res=llm.llm_text_request(usrp,sysp)
+    res_dict = json.loads(res)
+    return res_dict["en_tile"],res_dict["ch_title"]
+
+
+def process_title(id,info=None,method="ch_en_selling_title"):
+    info_new=None
+    if len(id) == 9:
+        info_new=process_sellpoint(id)[1]
+    if info_new ==None and info !=None:
+        info_new=info
+    en_tile,ch_title=gen_title(info_new)
+    return en_tile,ch_title
+if __name__ == "__main__":
+    print(process_sellpoint("","/data/data/luosy/project/sku_search/temp_img/企业微信截图_17439887354277.png","1、16mm平纹香云纱;2、香蕉裤型;3、裤身立体收省;4、后腰包橡筋;5、两侧插袋;两边插袋里面有网布贴袋,一大一小,大的可套手机,小的可套耳机或卡片;6、真门牌,大溪地贝壳扣"))
+    # print(process_sellpoint("1E2L1D290"))
+
+#     from PIL import Image
+#     img1=Image.open("/data/data/luosy/project/sku_search/temp_img/企业微信截图_17372766091671.png")
+
+#     ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=generate_text("",img1,"""-With elastic waistband
+# -With hairband
+# -X-line fit
+# 1.腰部橡筋 2.袖子橡
+# 筋 3.前中绳子可调
+# 节大小""")
+#     print(len(en_sen),end=" ")
+#     print(ch_sen,en_sen,key_point)
+#     ###############################
+#     img2=Image.open("/data/data/luosy/project/sku_search/temp_img/企业微信截图_17389065463149[1](1).png")
+
+#     ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=generate_text("",img2,"""-Washable wool
+# -Unisex
+# -With silver threads
+# 1.后中开衩;2.双扣可调节袖袢;3.暗门筒设计,天然果实扣;4.可水洗羊毛含银葱人字纹面料;5.里面左右两侧均有内袋,左侧最外层内袋是手机袋,防丢失""")
+#     print(len(en_sen),end=" ")
+#     print(ch_sen,en_sen,key_point)
+#     ###############################
+#     img3=Image.open("/data/data/luosy/project/sku_search/temp_img/企业微信截图_17392379937637.png")
+
+#     ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=generate_text("",img3,"""-Acetate
+# -With pockets
+# -Workwear
+# 1.描述二醋酸面料:2.扣子为镶钻布包扣;3.半裙后腰包橡筋;4.半裙有
+# 侧插袋;5.半裙有侧开隐形拉链,这是两件套套装""")
+#     print(len(en_sen),end=" ")
+#     print(ch_sen,en_sen,key_point)

+ 273 - 0
utils/utils/chat_old1.py

@@ -0,0 +1,273 @@
+import io
+import base64
+import requests
+import time
+import os
+import tos
+from PIL import Image
+from tos import HttpMethodType
+from volcenginesdkarkruntime import Ark
+import uuid
+from  utils.conf import *
+import json
+# 豆包的处理
+def get_intent(user_prompt):
+    sy_prompt="你是一个文案总结高手,请对用户输入的文案进行简化,并返回结果,不要对内容进行拓展"
+    prompt1=f"""请对下面的英文进行缩短简化,简化后的英文字符数量必须在150-200之间(包括标点符号,请记住不是单词数量,是每个字符数)。请直接返回结果,不要输出其他内容。内容如下:{user_prompt}"""
+    completion = client.chat.completions.create(
+        messages = [
+            {"role": "system", "content": sy_prompt},
+            {"role": "user", "content": prompt1},
+        ],
+        model="ep-20241018084532-cgm84",
+    )
+
+    return completion.choices[0].message.content
+
+def search_json_files(filename):
+    directory = "./database/meta"
+    data = {}
+    # im=Image.new('RGB', (800, 800), 'white')
+    for file in os.listdir(directory):
+        if file.endswith('.json') and filename in file:
+            with open(os.path.join(directory, file), 'r') as f:
+                data = json.load(f)
+    if data =={}:
+        return None,"暂无数据","暂无数据","暂无数据","暂无数据","暂无数据"
+    return data["商品图像"], data["价格"], data["色号"], data["成分"], data["关键词"], data["商品细节"]
+
+def compress_image(input_path, output_path):
+    img = Image.open(input_path)
+    current_size = os.path.getsize(input_path)
+    # 粗略的估计压缩质量,也可以从常量开始,逐步减小压缩质量,直到文件大小小于目标大小
+    image_quality = int(float(target_size / current_size) * 100)
+    img.save(output_path, optimize=True, quality=int(float(target_size / current_size) * 100))
+    # 如果压缩后文件大小仍然大于目标大小,则继续压缩
+    # 压缩质量递减,直到文件大小小于目标大小
+    while os.path.getsize(output_path) > target_size:
+        img = Image.open(output_path)
+        image_quality -= 10
+        if image_quality <= 0:
+            break
+        img.save(output_path, optimize=True, quality=image_quality)
+    return image_quality
+
+def upload_tos(filename, tos_bucket_name, tos_object_key):
+
+    tos_client, inner_tos_client = get_tos_client()
+    try:
+        # 将本地文件上传到目标桶中, filename为本地压缩后图片的完整路径
+        tos_client.put_object_from_file(tos_bucket_name, tos_object_key, filename)
+        # 获取上传后预签名的 url
+        return inner_tos_client.pre_signed_url(HttpMethodType.Http_Method_Get, tos_bucket_name, tos_object_key)
+    except Exception as e:
+        if isinstance(e, tos.exceptions.TosClientError):
+            # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
+            print('fail with client error, message:{}, cause: {}'.format(e.message, e.cause))
+        elif isinstance(e, tos.exceptions.TosServerError):
+            # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
+            print('fail with server error, code: {}'.format(e.code))
+            # request id 可定位具体问题,强烈建议日志中保存
+            print('error with request id: {}'.format(e.request_id))
+            print('error with message: {}'.format(e.message))
+            print('error with http code: {}'.format(e.status_code))
+
+        else:
+
+            print('fail with unknown error: {}'.format(e))
+
+        raise e
+# def get_system_prompt(info ):   
+#     system_prompt = """
+#             # 任务:欧美独立站服装卖点文案创作
+#                 作为一名在欧美独立站服装营销领域经验丰富的卖点文案专家,你需要根据我提供的图片与文本信息,精心打磨出一份极具吸引力的服装卖点文案。这份文案将直接面向欧美市场的消费者,要充分展现出服装的独特价值。文案需涵盖以下三个核心部分:
+#             ## 一、服装描述(中文)
+#                 用精炼的语言,在 100 字以内全方位展示服装的魅力,请只描述服装,不要出现其他品牌名字。
+#             ### 内容构成
+#                     1.面料阐述:若面料材质采用桑蚕丝、二醋酸、三醋酸、香云纱、羊绒、羊毛、美丽诺羊毛、可水洗 / 机洗羊毛、鹅绒、亚麻、棉、羊毛混纺、羊驼毛、马海毛、天丝等材质,优先介绍面料包含怎么样的材质,不然请优先描述细节点和特性。。
+#                     2.细节亮点:包含口袋、弹力橡筋、头花等设计细节,这些小细节往往能体现服装的实用性与独特性。
+#                     3.穿着特性:明确服装适用的季节、场合,以及给出搭配建议,帮助消费者构建穿着场景。
+#             ### 创作要求
+#                 1.引人入胜的开篇:以一句大胆、新颖甚至颠覆常规认知的话语作为开头,瞬间抓住消费者的眼球,并且里面包含衣服的款式。比如 “打破传统的大衣穿搭,这款服装给你全新体验” (尽可能创造新的开头)。
+#                 2.突出独特与品质:深入挖掘服装的面料和设计优势,运用饱含情感的修饰词;使用生动形象的词语。
+#             ### 示例
+#                 绿色代表着生机与活力,这款外套是兼具实用与美观的优雅通勤气质单品。四面弹斜纹布透气性好,细腻的光泽感和适度的垂坠性更显高级。金色的双排扣设计是其显著的特点之一,复古优雅。可以搭配同色系的裤子,展现出自己的个性与魅力。
+#             ## 二、服装描述(英文)
+#                 这部分是中文描述的精准翻译,要求字符数(含标点)不超过 300 个。如果翻译后的内容超过 350 个字符,需要对内容进行合理简化,确保信息完整且语言简洁流畅,符合欧美消费者的阅读习惯。
+#             ### 示例
+#                 Green symbolizes vitality and energy, making this jacket the perfect blend of practicality and elegance for daily commutes. Crafted from breathable four - way stretch twill, its fine sheen and graceful drape exude sophistication. Gold double - breasted buttons that add a touch of vintage charm. Pair it with matching trousers to showcase your unique style and charisma.
+#             ## 三、3 个关键点(英文)
+#                 从服装描述中精准提取三个最具代表性的卖点关键词,每个卖点的表述不超过 4 个单词,力求简洁明了,能够快速传达服装的核心优势。
+#             ### 示例
+#                 -Beathable. -H-line fit. -Casual and work wear
+#             ## 该衣服信息如下:{info}
+#             ## 以下是输出样例的格式给你参考
+#                 - 样例:{{"服装描述(中文)": "作为日常通勤的完美选择,这款四面弹阔腿裤具有出色的弹性和透气性,即使在炎炎夏日也能保持凉爽舒适。弹力腰带、侧口袋和前褶等贴心细节既提升了舒适度,又增强了时尚感。搭配同款夹克,打造别致的造型。", "服装描述(英文)": "The perfect choice for daily commutes, these four-way stretch wide-leg pants offer exceptional elasticity and breathability, keeping you cool and comfortable even on hot summer days. Thoughtful details like an elastic waistband, side pockets, and front pleats enhance both comfort and style. Pair them with a matching jacket for a chic coordinated look.", "3个关键点(英文)": "-Elastic. -With elastic waistband. -With pockets"}}
+#             ## 请严格按照标准的json格式输出结果给我,用英文的双引号而不是单引号!请直接回答结果给我,不要回答我除此之外的任何其他内容!输出不要带有其他格式(如换行加粗等)。
+#     """
+#     return system_prompt
+def get_system_prompt(info):
+    system_prompt = f"""
+        ## 你现在是一位欧美独立站服装卖点文案的专家;我现在要在独立站上写衣服的卖点,请根据我提供给你的图片和文本写出对应的卖点文案。输出的文案包括3个部分,服装描述(中文)+服装描述(英文)+3个关键点(英文)。 
+        ## 服装描述(中文):以一句大胆的话语作为开头+服装内容的描述(主要是对衣服面料、细节点和特性的描述),其中字数要在100个字以内。
+            - 引人入胜的开篇:以一句大胆、新颖甚至颠覆常规认知的话语作为开头,瞬间抓住消费者的眼球,最好是形容词+品类的形式。并且句子里面一定要包含衣服的品类的词(比如连衣裙,两件套,外套,裤子,半身裙,衬衫等等)。开头不要用反问句!绝对不要包含词语:这件,这款。
+            - 服装内容的描述:内容包括衣服面料+细节点(包括口袋、弹力橡筋、头花等)+特性(包括适用季节、场合、搭配等)。如果面料属于桑蚕丝、二、三醋酸、香云纱、羊绒、羊毛、美丽诺羊毛、可水洗羊毛、可机洗羊毛、鹅绒、亚麻、棉、羊毛混纺、羊驼毛、马海毛、天丝,那请优先描述面料,不然请优先描述细节点和特性。描述时请描述产品的独特设计和材质,加入带有情感的修饰词。用强烈的色彩对比和生动的词语。强调功能性的同时融入时尚表达。强调设计细节的用心。描述产品如何提升穿着者的气场。
+            - 服装描述(中文)例子如下:
+                样例:绿色代表着生机与活力,这款外套是兼具实用与美观的优雅通勤气质单品。四面弹斜纹布透气性好,细腻的光泽感和适度的垂坠性更显高级。金色的双排扣设计是其显著的特点之一,复古优雅。可以搭配同色系的裤子,展现出自己的个性与魅力。
+        ## 服装描述(英文)是服装描述(中文)的翻译,其中字数要在300个字符以内(包括标点符号),如果翻译后文本还是超过350个字符请简化一下内容。
+            - 服装描述(英文)例子如下:
+            - 样例:Green symbolizes vitality and energy, making this jacket the perfect blend of practicality and elegance for daily commutes. Crafted from breathable four-way stretch twill, its fine sheen and graceful drape exude sophistication. Gold double-breasted buttons that add a touch of vintage charm. Pair it with matching trousers to showcase your unique style and charisma.
+        ## 3个关键点(英文)是从服装描述中提取三个卖点关键词,每个卖点不超过4个单词,内容尽可能简洁。
+            - 3个关键点(英文)例子如下:
+            - 样例:-Beathable. -H-line fit. -Casual and work wear
+        ## 该衣服信息如下:{info}
+        ## 以下是输出样例的格式给你参考
+            - 样例:{{"服装描述(中文)": "作为日常通勤的完美选择,这款四面弹阔腿裤具有出色的弹性和透气性,即使在炎炎夏日也能保持凉爽舒适。弹力腰带、侧口袋和前褶等贴心细节既提升了舒适度,又增强了时尚感。搭配同款夹克,打造别致的造型。", "服装描述(英文)": "The perfect choice for daily commutes, these four-way stretch wide-leg pants offer exceptional elasticity and breathability, keeping you cool and comfortable even on hot summer days. Thoughtful details like an elastic waistband, side pockets, and front pleats enhance both comfort and style. Pair them with a matching jacket for a chic coordinated look.", "3个关键点(英文)": "-Elastic. -With elastic waistband. -With pockets"}}
+        ## 请严格按照标准的json格式输出结果给我,用英文的双引号而不是单引号!请直接回答结果给我,不要回答我除此之外的任何其他内容!输出不要带有其他格式(如换行加粗等)。
+    """
+    return system_prompt
+def is_valid_response(response):
+    """
+    检查响应是否是有效的JSON格式且包含所需字段
+    """
+    try:
+        response_dict = json.loads(response)
+        required_fields = ["服装描述(中文)", "服装描述(英文)", "3个关键点(英文)"]
+        return all(field in response_dict for field in required_fields)
+    except:
+        return False
+
+def extract_response_values(response):
+    """
+    从响应中提取各个字段的值
+    Args:
+        response: JSON格式的响应字符串
+    Returns:
+        tuple: (中文描述, 英文描述, 关键点)
+    """
+    try:
+        response_dict = json.loads(response)
+        cn_desc = response_dict.get("服装描述(中文)", "")
+        en_desc = response_dict.get("服装描述(英文)", "")
+        key_points = response_dict.get("3个关键点(英文)", "")
+        return cn_desc, en_desc, key_points
+    except:
+        return "", "", ""
+
+def doubao_request(pre_signed_url_output, info, max_retries=3):
+    """
+    带重试机制的豆包请求
+    Args:
+        pre_signed_url_output: 预签名URL
+        info: 商品信息
+        max_retries: 最大重试次数
+    Returns:
+        tuple: (中文描述, 英文描述, 关键点)
+    """
+    info = get_system_prompt(info)
+    client = Ark(api_key=api_key)
+    
+    for attempt in range(max_retries):
+        try:
+            t=time.time()
+            print(f"Attempt {attempt + 1}: Sending request...{t}")
+            response = client.chat.completions.create(
+                model="ep-20241202090505-2mncj",
+                messages=[{"role": "user","content": [
+                        {"type": "text", "text": info},
+                        {"type": "image_url", "image_url": {"url": pre_signed_url_output.signed_url}}
+                    ],
+                }],
+                temperature=0.8,
+                extra_headers={"x-ark-beta-vision": "true"}
+            )
+            result = response.choices[0].message.content
+            print(f"Attempt {attempt + 1}: Sending request...{time.time()-t}")
+            if is_valid_response(result):
+                ch,en,key=extract_response_values(result)
+                if len(en)>350:
+                    # print(f"之前 {en}")
+                    en=get_intent(en)
+                    # print(f"之后 {en}")
+                return ch,en,key
+            
+            print(f"Attempt {attempt + 1}: Invalid response format, retrying...")
+            
+        except Exception as e:
+            print(f"Attempt {attempt + 1} failed with error: {str(e)}")
+            if attempt == max_retries - 1:
+                raise e
+    
+    raise Exception("Failed to get valid response after maximum retries")
+
+def doubao_generate(image,info):
+    if not os.path.exists(root_path):
+        os.makedirs(root_path)
+    file_name=f"{uuid.uuid4()}.jpg"
+    original_file=os.path.join(root_path,file_name)
+    
+    # 添加这一行,将RGBA转换为RGB
+    if image.mode == 'RGBA':
+        image = image.convert('RGB')
+        
+    image.save(original_file)
+    compressed_file = original_file
+    object_key = file_name
+    quality = compress_image(original_file, compressed_file)
+  
+    pre_signed_url_output = upload_tos(compressed_file,  bucket_name, object_key)
+    if pre_signed_url_output and os.path.exists(compressed_file):
+        os.remove(compressed_file)
+    return doubao_request(pre_signed_url_output,info)
+
+def generate_text(id,image=None,info="",check=True,model="doubao"):
+    if len(id) == 9:
+        id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=search_json_files(id)
+
+    else:
+        id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=None,None,None,None,None,None
+    # print(id_image,id_price, id_color, id_ingredient, id_selling_point, id_details)
+
+
+    info=id_details+info if id_details else info
+    if image == None and id_image != None:
+        image=Image.open(id_image)
+    if model=="doubao":
+        ch_sen,en_sen,key_point=doubao_generate(image,info)
+    return ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details
+    # elif model=="openai":
+    #     return openai_generate(image)
+ 
+
+
+if __name__ == "__main__":
+    from PIL import Image
+    img1=Image.open("/data/data/luosy/project/sku_search/temp_img/企业微信截图_17372766091671.png")
+
+    ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=generate_text("",img1,"""-With elastic waistband
+-With hairband
+-X-line fit
+1.腰部橡筋 2.袖子橡
+筋 3.前中绳子可调
+节大小""")
+    print(len(en_sen),end=" ")
+    print(ch_sen,en_sen,key_point)
+    ###############################
+    img2=Image.open("/data/data/luosy/project/sku_search/temp_img/企业微信截图_17389065463149[1](1).png")
+
+    ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=generate_text("",img1,"""-Washable wool
+-Unisex
+-With silver threads
+1.后中开衩;2.双扣可调节袖袢;3.暗门筒设计,天然果实扣;4.可水洗羊毛含银葱人字纹面料;5.里面左右两侧均有内袋,左侧最外层内袋是手机袋,防丢失""")
+    print(len(en_sen),end=" ")
+    print(ch_sen,en_sen,key_point)
+    ###############################
+    img3=Image.open("/data/data/luosy/project/sku_search/企业微信截图_17392379937637.png")
+
+    ch_sen,en_sen,key_point,id_image,id_price, id_color, id_ingredient, id_selling_point, id_details=generate_text("",img1,"""-Acetate
+-With pockets
+-Workwear
+1.描述二醋酸面料:2.扣子为镶钻布包扣;3.半裙后腰包橡筋;4.半裙有
+侧插袋;5.半裙有侧开隐形拉链,这是两件套套装""")
+    print(len(en_sen),end=" ")
+    print(ch_sen,en_sen,key_point)

+ 38 - 0
utils/utils/conf.py

@@ -0,0 +1,38 @@
+import tos
+from volcenginesdkarkruntime import Ark
+
+class MMMConfig:
+    target_size = 300 * 1024
+    tos_bucket_name = "ark-auto-2100652488-cn-beijing-default"
+    model="doubao-1-5-vision-pro-32k-250115"
+    api_key = ('817dff39-5586-4f9b-acba-55004167c0b1')
+    client = Ark(api_key=api_key)
+    root_path="/data/data/luosy/project/sku_search/temp_img"
+    ak = ('AKLTMDIxMmQ5NWQ0MDc1NDAzYjhjZWM4YjQ3MjM3ZDUzMjc')
+    sk = ('WTJSbU1UZzVZek5rTldWa05HTTVPVGhqTmpnNE5UWmxaR0prTUdNME56aw==')
+    endpoint, region = "tos-cn-beijing.volces.com", "cn-beijing"
+    tos_client=tos.TosClientV2(ak, sk, endpoint, region)
+    inner_tos_client=tos.TosClientV2(ak, sk, endpoint, region)
+    
+class LMConfig:
+    lm_client = Ark(
+        api_key= "817dff39-5586-4f9b-acba-55004167c0b1",
+        base_url="https://ark.cn-beijing.volces.com/api/v3"
+    )
+    model="ep-20241018084532-cgm84"
+
+
+doubao_ky=("817dff39-5586-4f9b-acba-55004167c0b1","https://ark.cn-beijing.volces.com/api/v3")
+ali_ky=("sk-04b63960983445f980d85ff185a17876","https://dashscope.aliyuncs.com/compatible-mode/v1")
+doubao_model={
+    "text_doubao":"ep-20241018084532-cgm84",
+    "text_ds":"deepseek-r1-250120",
+    "mm_doubao":"doubao-1-5-vision-pro-32k-250115"
+}
+ali_model={
+    "text_ds":"deepseek-r1",
+    "text_dsv3":"deepseek-v3",
+    "text_qwen":"qwen-max-2025-01-25",
+    "mm_tyqw":"qwen-vl-max",
+    "mm_qwen":"qwen-vl-plus"
+}

+ 82 - 0
utils/utils/doubao.py

@@ -0,0 +1,82 @@
+from PIL import Image
+import os
+from  utils.conf import *
+from tos import HttpMethodType
+import time
+
+def get_lm_text(sys_prompt,user_prompt):
+    completion = LMConfig.lm_client.chat.completions.create(
+        messages = [
+            {"role": "system", "content": sys_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        model=LMConfig.model,
+    )
+
+    return completion.choices[0].message.content
+
+
+
+
+## 多模态的输入
+def compress_image(input_path, output_path):
+    img = Image.open(input_path)
+    current_size = os.path.getsize(input_path)
+    # 粗略的估计压缩质量,也可以从常量开始,逐步减小压缩质量,直到文件大小小于目标大小
+    image_quality = int(float(MMMConfig.target_size / current_size) * 100)
+    img.save(output_path, optimize=True, quality=int(float(MMMConfig.target_size / current_size) * 100))
+    # 如果压缩后文件大小仍然大于目标大小,则继续压缩
+    # 压缩质量递减,直到文件大小小于目标大小
+    while os.path.getsize(output_path) > MMMConfig.target_size:
+        img = Image.open(output_path)
+        image_quality -= 10
+        if image_quality <= 0:
+            break
+        img.save(output_path, optimize=True, quality=image_quality)
+    return image_quality
+
+def upload_tos(filename,  tos_object_key):
+
+    tos_client, inner_tos_client = MMMConfig.tos_client, MMMConfig.inner_tos_client
+    try:
+        # 将本地文件上传到目标桶中, filename为本地压缩后图片的完整路径
+        tos_client.put_object_from_file(MMMConfig.tos_bucket_name, tos_object_key, filename)
+        # 获取上传后预签名的 url
+        return inner_tos_client.pre_signed_url(HttpMethodType.Http_Method_Get, MMMConfig.tos_bucket_name, tos_object_key)
+    except Exception as e:
+        if isinstance(e, tos.exceptions.TosClientError):
+            # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
+            print('fail with client error, message:{}, cause: {}'.format(e.message, e.cause))
+        elif isinstance(e, tos.exceptions.TosServerError):
+            # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
+            print('fail with server error, code: {}'.format(e.code))
+            # request id 可定位具体问题,强烈建议日志中保存
+            print('error with request id: {}'.format(e.request_id))
+            print('error with message: {}'.format(e.message))
+            print('error with http code: {}'.format(e.status_code))
+
+        else:
+
+            print('fail with unknown error: {}'.format(e))
+
+        raise e
+
+
+
+def doubao_MMM_request(pre_signed_url_output, prompt):
+
+    client = MMMConfig.client
+    
+
+    response = client.chat.completions.create(
+        model=MMMConfig.model,
+        messages=[{"role": "user","content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": {"url": pre_signed_url_output.signed_url}}
+            ],
+        }],
+        temperature=0.8,
+        extra_headers={"x-ark-beta-vision": "true"}
+    )
+    result = response.choices[0].message.content
+    return result

+ 197 - 0
utils/utils/llm.py

@@ -0,0 +1,197 @@
+import io
+from PIL import Image
+import os
+
+import numpy as np
+from openai import OpenAI
+from  utils.conf import *
+from tos import HttpMethodType
+import time
+from openai import OpenAI
+import os
+import base64
+from PIL import Image
+
+def image_to_base64(image):
+    # 将Image对象转换为BytesIO对象
+    image_io = io.BytesIO()
+    image.save(image_io, format='PNG')
+    image_io.seek(0)
+
+    # 使用base64编码
+    image_base64 = base64.b64encode(image_io.read()).decode('utf-8')
+
+    return f"data:image/png;base64,{image_base64}"
+
+def image_reader(image):
+    """图片读取器,输出PIL.Image格式的图片"""
+    if isinstance(image,str):
+        if image.startswith("http"):
+            return image
+        else:
+            image_path = image
+        out_image = Image.open(image_path)
+    elif isinstance(image,np.ndarray):
+        out_image = Image.fromarray(image)
+    else:
+        out_image = image
+    out_image=out_image.convert('RGB')
+    base64_img=image_to_base64(out_image)
+    return base64_img
+def get_lm_text(sys_prompt,user_prompt):
+    completion = LMConfig.lm_client.chat.completions.create(
+        messages = [
+            {"role": "system", "content": sys_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        model=LMConfig.model,
+    )
+
+    return completion.choices[0].message.content
+
+
+
+
+## 多模态的输入
+def compress_image(input_path, output_path):
+    img = Image.open(input_path)
+    current_size = os.path.getsize(input_path)
+    # 粗略的估计压缩质量,也可以从常量开始,逐步减小压缩质量,直到文件大小小于目标大小
+    image_quality = int(float(MMMConfig.target_size / current_size) * 100)
+    img.save(output_path, optimize=True, quality=int(float(MMMConfig.target_size / current_size) * 100))
+    # 如果压缩后文件大小仍然大于目标大小,则继续压缩
+    # 压缩质量递减,直到文件大小小于目标大小
+    while os.path.getsize(output_path) > MMMConfig.target_size:
+        img = Image.open(output_path)
+        image_quality -= 10
+        if image_quality <= 0:
+            break
+        img.save(output_path, optimize=True, quality=image_quality)
+    return image_quality
+
+def upload_tos(filename,  tos_object_key):
+
+    tos_client, inner_tos_client = MMMConfig.tos_client, MMMConfig.inner_tos_client
+    try:
+        # 将本地文件上传到目标桶中, filename为本地压缩后图片的完整路径
+        tos_client.put_object_from_file(MMMConfig.tos_bucket_name, tos_object_key, filename)
+        # 获取上传后预签名的 url
+        return inner_tos_client.pre_signed_url(HttpMethodType.Http_Method_Get, MMMConfig.tos_bucket_name, tos_object_key)
+    except Exception as e:
+        if isinstance(e, tos.exceptions.TosClientError):
+            # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
+            print('fail with client error, message:{}, cause: {}'.format(e.message, e.cause))
+        elif isinstance(e, tos.exceptions.TosServerError):
+            # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
+            print('fail with server error, code: {}'.format(e.code))
+            # request id 可定位具体问题,强烈建议日志中保存
+            print('error with request id: {}'.format(e.request_id))
+            print('error with message: {}'.format(e.message))
+            print('error with http code: {}'.format(e.status_code))
+
+        else:
+
+            print('fail with unknown error: {}'.format(e))
+
+        raise e
+
+
+
+def doubao_MMM_request(pre_signed_url_output, prompt):
+
+    client = MMMConfig.client
+    
+
+    response = client.chat.completions.create(
+        model=MMMConfig.model,
+        messages=[{"role": "user","content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": {"url": pre_signed_url_output.signed_url}}
+            ],
+        }],
+        temperature=0.8,
+        extra_headers={"x-ark-beta-vision": "true"}
+    )
+    result = response.choices[0].message.content
+    return result
+
+
+class llm_request:
+    def __init__(self,api_key,base_url,model) -> None:
+        self.api_key=api_key
+        self.base_url=base_url
+        self.model=model
+
+
+    def llm_mm_request(self,usr_text,img,sys_text="You are a helpful assistant."):
+        client = OpenAI(
+        # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx"
+        api_key=self.api_key,
+        base_url=self.base_url
+    )
+        completion = client.chat.completions.create(
+            model=self.model,#
+            messages=[
+                {
+                    "role": "system",
+                    "content": [{"type":"text","text": sys_text}]},
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            # 需要注意,传入Base64,图像格式(即image/{format})需要与支持的图片列表中的Content Type保持一致。"f"是字符串格式化的方法。
+                            # PNG图像:  f"data:image/png;base64,{base64_image}"
+                            # JPEG图像: f"data:image/jpeg;base64,{base64_image}"
+                            # WEBP图像: f"data:image/webp;base64,{base64_image}"
+                            "image_url": {"url": image_reader(img)}, 
+                        },
+                        {"type": "text", "text": usr_text},
+                    ],
+                }
+
+            ],
+            temperature=1.5,
+            top_p=0.85, 
+            presence_penalty=1.5, 
+            frequency_penalty=1.5,
+        )
+        return completion.choices[0].message.content
+    
+    def llm_text_request(self,text,sys_text="You are a helpful assistant."):
+        client = OpenAI(
+        # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx"
+        api_key=self.api_key,
+        base_url=self.base_url
+    )
+        completion = client.chat.completions.create(
+            model=self.model,#
+            messages=[
+                {
+                    "role": "system",
+                    "content": sys_text},
+                {
+                    "role": "user",
+                    "content":  text,
+                }
+            ],
+            temperature=0.9,
+        )
+        return completion.choices[0].message.content
+    
+
+
+if __name__=="__main__":
+    ##ali
+    ky="sk-04b63960983445f980d85ff185a17876"
+    baseurl="https://dashscope.aliyuncs.com/compatible-mode/v1"
+    model="qwen-vl-max-latest"
+    ##doubao
+    # ky='817dff39-5586-4f9b-acba-55004167c0b1'
+    # baseurl="https://ark.cn-beijing.volces.com/api/v3"
+    # model="doubao-1-5-vision-pro-32k-250115"
+    llm=llm_request(ky,baseurl,model)
+    res1=llm.llm_mm_request("描述一下图片中的衣服","/data/data/Mia/product_env_project/gen_sellpoint/企业微信截图_17372766091671.png")
+    print(res1)
+    res2=llm.llm_text_request("你好!你是谁")
+    print(res2)

+ 29 - 0
utils/utils/module.py

@@ -0,0 +1,29 @@
+import json
+import pandas as pd
+
+def save_json(file_path, data):
+    with open(file_path, 'w', encoding='utf-8') as json_file:
+        json.dump(data, json_file, ensure_ascii=False, indent=4)
+
+def update_excel_from_json(json_file_path, excel_file_path):
+    # 读取 JSON 文件
+    with open(json_file_path, 'r', encoding='utf-8') as json_file:
+        json_data = json.load(json_file)
+
+    # 读取 Excel 文件
+    df = pd.read_excel(excel_file_path)
+    item = json_data
+
+    product_code = item.get("货号")  # 获取“货号”
+    # 查找所有匹配的行索引
+    matching_rows = df[df['商品编码'] == product_code].index  # 获取所有匹配行号
+    if not matching_rows.empty:  # 检查是否有匹配的行
+        for row_index in matching_rows:  # 遍历所有匹配的行
+            # 将其余字段的值写入到对应行的末尾
+            for key, value in item.items():
+                if key != "货号" and key != "色号" and key != "价格" and key != "成分":  # 排除“货号”字段
+                    df.at[row_index, key] = value
+
+    # 保存更新后的 Excel 文件
+    df.to_excel(excel_file_path, index=False)
+    print("数据已成功更新到 Excel 文件。")

+ 18 - 0
utils/utils/pdf_extract.py

@@ -0,0 +1,18 @@
+import fitz  
+
+def extract_text(file_path, page_number):
+    # 打开PDF文件
+    doc = fitz.open(file_path)
+    
+    # 检查页面号是否有效
+    if page_number < 1 or page_number > len(doc):
+        return "页面号超出文档范围"
+    
+    # 提取指定页面的文本(页面索引从0开始,因此减1)
+    page = doc[page_number - 1]
+    text = page.get_text()
+    
+    # 关闭文档
+    doc.close()
+    
+    return text

+ 35 - 0
utils/utils/process_meta.py

@@ -0,0 +1,35 @@
+import os
+import json
+
+def add_image(json_file_path, image_url):
+    """添加商品图像字段到指定的JSON文件"""
+    # 读取JSON文件
+    with open(json_file_path, 'r', encoding='utf-8') as file:
+        data = json.load(file)
+
+    # 添加“商品图像”字段
+    data['商品图像'] = image_url
+
+    # 将更新后的数据写回到JSON文件
+    with open(json_file_path, 'w', encoding='utf-8') as file:
+        json.dump(data, file, ensure_ascii=False, indent=4)
+
+    print("已成功添加“商品图像”字段。")
+
+if __name__ == "__main__":
+
+    json_folder = "./database/meta"
+    image_folder = "./database/image"
+
+    for filename in os.listdir(json_folder):
+        json_file_path = os.path.join(json_folder, filename)
+        image_url = os.path.join(image_folder, filename.replace("png"))
+
+        if os.path.exists(image_url):
+            print(f"正在处理:{json_file_path}")
+            # add_image(json_file_path, image_url)
+        else:
+            print(f"图片文件不存在:{image_url}")
+
+
+

+ 151 - 0
utils/utils/prompt.py

@@ -0,0 +1,151 @@
+## 中文变英文卖点文案
+def get_ch_en_selling_points(info):
+    ch_en_selling_points = f"""
+        ## 你现在是一位欧美独立站服装卖点文案的专家;我现在要在独立站上写衣服的卖点,请根据我提供给你的图片和文本写出对应的卖点文案。输出的文案包括3个部分,服装描述(中文)+服装描述(英文)+3个关键点(英文)。 
+        ## 服装描述(中文):以一句大胆的话语作为开头+服装内容的描述(主要是对衣服面料、细节点和特性的描述),其中字数要在100个字以内。
+            - 引人入胜的开篇:以一句大胆、新颖甚至颠覆常规认知的话语作为开头,瞬间抓住消费者的眼球,最好是形容词+品类的形式。并且句子里面一定要包含衣服的品类的词(比如连衣裙,两件套,外套,裤子,半身裙,衬衫等等)。开头不要用反问句!绝对不要包含词语:这件,这款。
+            - 服装内容的描述:内容包括衣服面料+细节点(包括口袋、弹力橡筋、头花等)+特性(包括适用季节、场合、搭配等)。如果面料属于桑蚕丝、二、三醋酸、香云纱、羊绒、羊毛、美丽诺羊毛、可水洗羊毛、可机洗羊毛、鹅绒、亚麻、棉、羊毛混纺、羊驼毛、马海毛、天丝,那请优先描述面料的材料(很重要)!不然请优先描述细节点和特性。描述时请描述产品的独特设计和材质,加入带有情感的修饰词。用强烈的色彩对比和生动的词语。强调功能性的同时融入时尚表达。强调设计细节的用心。描述产品如何提升穿着者的气场。
+            - 服装描述(中文)例子如下:
+                样例:充满生机与活力的外套,这款外套是兼具实用与美观的优雅通勤气质单品。四面弹斜纹布透气性好,细腻的光泽感和适度的垂坠性更显高级。金色的双排扣设计是其显著的特点之一,复古优雅。可以搭配同色系的裤子,展现出自己的个性与魅力。
+        ## 服装描述(英文)是服装描述(中文)的翻译,其中字数要在300个字符以内(包括标点符号),如果翻译后文本还是超过350个字符请简化一下内容。
+            - 服装描述(英文)例子如下:
+            - 样例:Green symbolizes vitality and energy, making this jacket the perfect blend of practicality and elegance for daily commutes. Crafted from breathable four-way stretch twill, its fine sheen and graceful drape exude sophistication. Gold double-breasted buttons that add a touch of vintage charm. Pair it with matching trousers to showcase your unique style and charisma.
+        ## 3个关键点(英文)是从服装描述中提取三个卖点关键词,每个卖点不超过4个单词,内容尽可能简洁。
+            - 3个关键点(英文)例子如下:
+            - 样例:-Beathable. -H-line fit. -Casual and work wear
+        ## 该衣服信息如下:{info}
+        ## 以下是输出样例的格式给你参考
+            - 样例:{{"服装描述(中文)": "作为日常通勤的完美选择,这款四面弹阔腿裤具有出色的弹性和透气性,即使在炎炎夏日也能保持凉爽舒适。弹力腰带、侧口袋和前褶等贴心细节既提升了舒适度,又增强了时尚感。搭配同款夹克,打造别致的造型。", "服装描述(英文)": "The perfect choice for daily commutes, these four-way stretch wide-leg pants offer exceptional elasticity and breathability, keeping you cool and comfortable even on hot summer days. Thoughtful details like an elastic waistband, side pockets, and front pleats enhance both comfort and style. Pair them with a matching jacket for a chic coordinated look.", "3个关键点(英文)": "-Elastic. -With elastic waistband. -With pockets"}}
+        ## 请严格按照标准的json格式输出结果给我,用英文的双引号而不是单引号!请直接回答结果给我,不要回答我除此之外的任何其他内容!输出不要带有其他格式(如换行加粗等)。
+    """
+    return ch_en_selling_points
+
+
+## 英文变中文卖点文案
+def get_en_ch_selling_points(info):
+    en_ch_selling_points = f"""
+        ## 你现在是一位欧美独立站服装卖点文案的专家;我现在要在独立站上写衣服的卖点,请根据我提供给你的图片和文本写出对应的卖点文案。输出的文案包括3个部分,服装描述(英文)+服装描述(中文)+3个关键点(英文)。 
+        ## 要求:请为衣服注入灵魂,衣服就像人一样,她/他的设计都是有初衷的,有自己的实用性和时尚性,请描述衣服的卖点的时候,像是在与消费者直接对话,营造出一种亲切、友好的氛围。且运用时尚且专业的词汇,通过细腻的描述来突出产品的特点和优势。巧妙地将产品与具体的生活场景相结合。整体表述简洁,没有过多冗长复杂的句子,信息传达直接高效,能够让读者快速抓住重点,了解产品的主要特点、优势和适用场景。
+        ## 内容如下:
+            ### 服装描述(英文):引人入胜的开篇+服装内容的描述(主要是对衣服面料、细节点和特性的描述),其中字数要在300个字符以内(包括标点符号)。
+                - 引人入胜的开篇:用新颖含有故事性的话语作为开头,瞬间抓住消费者的眼球。为衣服注入灵魂。衣服是有生命力的,不要用it表述衣服,而是拟人化。使用 “you” 直接与读者交流,亲切且对话感强,运用形象的比喻和富有情感的描述。不要老是以Imagine slipping into作为开头
+                - 服装内容的描述:内容包括衣服面料+细节点(包括口袋、弹力橡筋、头花等)+特性(包括适用季节、场合、搭配等)。如果面料属于桑蚕丝、二、三醋酸、香云纱、羊绒、羊毛、美丽诺羊毛、可水洗羊毛、可机洗羊毛、鹅绒、亚麻、棉、羊毛混纺、羊驼毛、马海毛、天丝,那请优先描述面料的材料(很重要)!不然请优先描述细节点和特性。描述时请描述产品的独特设计和材质,加入带有情感的修饰词。用强烈的色彩对比和生动的词语。强调功能性的同时融入时尚表达。强调设计细节的用心。描述产品如何提升穿着者的气场。
+                - 服装描述(英文)例子如下:
+                    样例1:"When you've got no time to waste, wrap up your look in seconds with this sleek vest that lets you face the day head-on. Made from crease-ease fabric, this low-maintenance charmer features a collared neckline and a detachable sash that keeps things looking sharp and always ready."
+                    样例2:"Like a familiar hug from Ahma, Farah is made of soft knit that gently wraps your curves and will continue to do so without losing shape. With a fixed sash for an added layer of style, she’s an essential that works under blazers or on her own."
+                    样例3:"Throw Riley on or layer her, and you’ll see why you need her in every colour. We one-upped ourselves and made her with a more refined finish, so you can look every bit the sharp shooter you already are. Comes with a functional front pocket for stashing your essentials."
+                    样例4:"For whatever the weekend brings, this skort's built-in shorts mean you can conquer the day without worrying about your bottoms riding up or sliding down. Plus, we designed her with a detachable belt that chinches your waist so good, you'll want to wear it forever. Comes with functional side pockets and closes with a back zip."
+                    样例5:"Consider Dahlia your fool-proof office look. A classic silhouette, but made better with a back zip, edgier angular lines, and a flattering square neck that’s appropriately feminine, yet modern. You’ll look like a 10/10 on that pitch, so go win it."
+            ### 服装描述(中文)是服装描述(英文)的翻译。
+            ### 3个关键点(英文)是从服装描述中提取三个卖点关键词,每个卖点不超过4个单词,内容尽可能简洁。
+                - 3个关键点(英文)例子如下:
+                - 样例:-Beathable. -H-line fit. -Casual and work wear
+        ## 该衣服信息如下:{info}
+        ## 以下是输出样例的格式给你参考
+            - 样例:{{"服装描述(英文)": "Some skirts are meant for you to dance in—Wei’s one of them. Featuring sunray pleats crafted from lightweight crepe, this midaxi skirt flows with ease while her fully elastic waistband ensures maximum comfort. And the best part? She’s machine washable, so you can wear her on repeat. No laundry or dry cleaning required", "服装描述(中文)":"有些裙子是专为让你尽情跳舞而设计的,薇这款裙子就是其中之一。这款中长款迷笛裙采用轻盈的绉纱面料打造出放射状褶裥,能轻松飘逸摆动,而全松紧腰头则确保了极致的舒适感。最棒的是什么呢?这条裙子可机洗,所以你可以反复穿着。无需手洗或干洗。","3个关键点(英文)": "-Bloat-friendly. -Bump-friendly. -Machine Washable"}}
+        ## 请严格按照标准的json格式输出结果给我,用英文的双引号而不是单引号!请直接回答结果给我,不要回答我除此之外的任何其他内容!输出不要带有其他格式(如换行加粗等)。
+    """
+    return en_ch_selling_points          
+
+def get_en_ch_selling_points_his(info,history):
+    en_ch_selling_points = f"""
+        ## 你现在是一位欧美独立站服装卖点文案的专家;我现在要在独立站上写衣服的卖点,请根据我提供给你的图片和文本写出对应的卖点文案。输出的文案包括3个部分,服装描述(英文)+服装描述(中文)+3个关键点(英文)。 
+        ## 要求:请为衣服注入灵魂,衣服就像人一样,她/他的设计都是有初衷的,有自己的实用性和时尚性,请描述衣服的卖点的时候,像是在与消费者直接对话,营造出一种亲切、友好的氛围。且运用时尚且专业的词汇,通过细腻的描述来突出产品的特点和优势。巧妙地将产品与具体的生活场景相结合。整体表述简洁,没有过多冗长复杂的句子,信息传达直接高效,能够让读者快速抓住重点,了解产品的主要特点、优势和适用场景。
+        ## 内容如下:
+            ### 服装描述(英文):引人入胜的开篇+服装内容的描述(主要是对衣服面料、细节点和特性的描述),其中字数要在300个字符以内(包括标点符号)内容里面不得包含款号和价格信息!!!要用英文描述,不能夹杂着中文!!!。
+
+                - 引人入胜的开篇:用新颖含有故事性的话语作为开头,瞬间抓住消费者的眼球。为衣服注入灵魂。衣服是有生命力的,不要用it表述衣服,而是拟人化。使用 “you” 直接与读者交流,亲切且对话感强,运用形象的比喻和富有情感的描述。不要老是以Imagine slipping into或者Embrace或者Discovery作为开头
+                    注意!!以下是常用的开篇,请不要再重复使用或者出现相同的词汇(如果使用重复的词汇则判断为类似),请生成比下面更加新颖的开头:
+                        {history}
+                - 服装内容的描述:内容包括衣服面料+细节点(包括口袋、弹力橡筋、头花等)+特性(包括适用季节、场合、搭配等)。如果面料属于桑蚕丝、二、三醋酸、香云纱、羊绒、羊毛、美丽诺羊毛、可水洗羊毛、可机洗羊毛、鹅绒、亚麻、棉、羊毛混纺、羊驼毛、马海毛、天丝,那请优先描述面料的材料(很重要)!不然请优先描述细节点和特性。描述时请描述产品的独特设计和材质,加入带有情感的修饰词。用强烈的色彩对比和生动的词语。强调功能性的同时融入时尚表达。强调设计细节的用心。描述产品如何提升穿着者的气场。
+                - 服装描述(英文)例子如下,请重点参考下面的样例的结构和语言风格进行生成卖点文案,但是不能输出一样的句子:
+                    样例1:"When you've got no time to waste, wrap up your look in seconds with this sleek vest that lets you face the day head-on. Made from crease-ease fabric, this low-maintenance charmer features a collared neckline and a detachable sash that keeps things looking sharp and always ready."
+                    样例2:"Like a familiar hug from Ahma, Farah is made of soft knit that gently wraps your curves and will continue to do so without losing shape. With a fixed sash for an added layer of style, she’s an essential that works under blazers or on her own."
+                    样例3:"Throw Riley on or layer her, and you’ll see why you need her in every colour. We one-upped ourselves and made her with a more refined finish, so you can look every bit the sharp shooter you already are. Comes with a functional front pocket for stashing your essentials."
+                    样例4:"For whatever the weekend brings, this skort's built-in shorts mean you can conquer the day without worrying about your bottoms riding up or sliding down. Plus, we designed her with a detachable belt that chinches your waist so good, you'll want to wear it forever. Comes with functional side pockets and closes with a back zip."
+                    样例5:"Consider Dahlia your fool-proof office look. A classic silhouette, but made better with a back zip, edgier angular lines, and a flattering square neck that’s appropriately feminine, yet modern. You’ll look like a 10/10 on that pitch, so go win it."
+                    样例6:"You balance it all — and even if you do break a sweat doing it, we promise Rachel will make you look like you have it all together. For the go-getter that you are, she comes with front and inner pockets, and an elongated silhouette that sits well everything on your weekday calendar."
+                    样例7:"Amiera’s birdseye knit and spongey softness give you that cosy, double-faced wool look without any of the heaviness. And, with her relaxed fit and minimal design, this knit cardigan is as plush as it is practical—like bringing your duvet to brunch."
+                    样例8:"You can’t quite top Joey for her versatility. She's the one to call for your everyday and in-betweens, and when you need a piece of clothing to count on for extra coverage under your layers. Also, she comes with removable bust paddings that are shaped better. How titillating."
+            ### 服装描述(中文)是服装描述(英文)翻译成中文。
+            ### 3个关键点(英文)是从服装描述中提取三个卖点关键词,每个卖点不超过4个单词,内容尽可能简洁。
+                - 3个关键点(英文)例子如下:
+                - 样例:-Beathable. -H-line fit. -Casual and work wear
+        ## 该衣服信息如下:{info}
+        ## 以下是输出样例的格式给你参考,请参考该格式的key输出字典给我:
+            - 样例:{{"服装描述(英文)": "Some skirts are meant for you to dance in—Wei’s one of them. Featuring sunray pleats crafted from lightweight crepe, this midaxi skirt flows with ease while her fully elastic waistband ensures maximum comfort. And the best part? She’s machine washable, so you can wear her on repeat. No laundry or dry cleaning required", "服装描述(中文)":"有些裙子是专为让你尽情跳舞而设计的,薇这款裙子就是其中之一。这款中长款迷笛裙采用轻盈的绉纱面料打造出放射状褶裥,能轻松飘逸摆动,而全松紧腰头则确保了极致的舒适感。最棒的是什么呢?这条裙子可机洗,所以你可以反复穿着。无需手洗或干洗。","3个关键点(英文)": "-Bloat-friendly. -Bump-friendly. -Machine Washable"}}
+        ## 请严格按照标准的字典格式输出结果给我,用英文的双引号而不是单引号!请直接回答结果给我,不要回答我除此之外的任何其他内容!输出不要带有其他格式(如换行加粗等)。
+    """
+    return en_ch_selling_points  
+        
+## 文本简化控制字数
+
+def TextControl_his(info,his):
+    sys_prompt="你是一个服装商品详情描述美化专家,请对用户输入的英文卖点进行美化,并返回结果"
+    usr_prompt = f"""
+        ## 你现在是一位欧美独立站服装卖点文案的专家;我现在要在独立站上写衣服的卖点,请根据我提供给你的图片和文本写出对应的卖点文案。输出的文案包括3个部分,服装描述(英文)+服装描述(中文)+3个关键点(英文)。 
+        ## 要求:请为衣服注入灵魂,衣服就像人一样,她/他的设计都是有初衷的,有自己的实用性和时尚性,请描述衣服的卖点的时候,像是在与消费者直接对话,营造出一种亲切、友好的氛围。且运用时尚且专业的词汇,通过细腻的描述来突出产品的特点和优势。巧妙地将产品与具体的生活场景相结合。整体表述简洁,没有过多冗长复杂的句子,信息传达直接高效,能够让读者快速抓住重点,了解产品的主要特点、优势和适用场景。
+        ## 内容如下:
+            ### 服装描述(英文):引人入胜的开篇+服装内容的描述(主要是对衣服面料、细节点和特性的描述),其中字数要在300个字符以内(包括标点符号)。
+
+                - 引人入胜的开篇:用新颖含有故事性的话语作为开头,瞬间抓住消费者的眼球。为衣服注入灵魂。衣服是有生命力的,不要用it表述衣服,而是拟人化。使用 “you” 直接与读者交流,亲切且对话感强,运用形象的比喻和富有情感的描述。不要老是以Imagine slipping into或者Embrace或者Discovery作为开头
+                    注意!!以下是常用的开篇,请不要再重复使用或者出现相同的词汇(如果使用重复的词汇则判断为类似),请生成比下面更加新颖的开头:
+                        {his}
+                - 服装内容的描述:内容包括衣服面料+细节点(包括口袋、弹力橡筋、头花等)+特性(包括适用季节、场合、搭配等)。如果面料属于桑蚕丝、二、三醋酸、香云纱、羊绒、羊毛、美丽诺羊毛、可水洗羊毛、可机洗羊毛、鹅绒、亚麻、棉、羊毛混纺、羊驼毛、马海毛、天丝,那请优先描述面料的材料(很重要)!不然请优先描述细节点和特性。描述时请描述产品的独特设计和材质,加入带有情感的修饰词。用强烈的色彩对比和生动的词语。强调功能性的同时融入时尚表达。强调设计细节的用心。描述产品如何提升穿着者的气场。
+                - 服装描述(英文)例子如下,请重点参考下面的样例的结构和语言风格进行生成卖点文案,但是不能输出一样的句子:
+                    样例1:"When you've got no time to waste, wrap up your look in seconds with this sleek vest that lets you face the day head-on. Made from crease-ease fabric, this low-maintenance charmer features a collared neckline and a detachable sash that keeps things looking sharp and always ready."
+                    样例2:"Like a familiar hug from Ahma, Farah is made of soft knit that gently wraps your curves and will continue to do so without losing shape. With a fixed sash for an added layer of style, she’s an essential that works under blazers or on her own."
+                    样例3:"Throw Riley on or layer her, and you’ll see why you need her in every colour. We one-upped ourselves and made her with a more refined finish, so you can look every bit the sharp shooter you already are. Comes with a functional front pocket for stashing your essentials."
+                    样例4:"For whatever the weekend brings, this skort's built-in shorts mean you can conquer the day without worrying about your bottoms riding up or sliding down. Plus, we designed her with a detachable belt that chinches your waist so good, you'll want to wear it forever. Comes with functional side pockets and closes with a back zip."
+                    样例5:"Consider Dahlia your fool-proof office look. A classic silhouette, but made better with a back zip, edgier angular lines, and a flattering square neck that’s appropriately feminine, yet modern. You’ll look like a 10/10 on that pitch, so go win it."
+                    样例6:"You balance it all — and even if you do break a sweat doing it, we promise Rachel will make you look like you have it all together. For the go-getter that you are, she comes with front and inner pockets, and an elongated silhouette that sits well everything on your weekday calendar."
+                    样例7:"Amiera’s birdseye knit and spongey softness give you that cosy, double-faced wool look without any of the heaviness. And, with her relaxed fit and minimal design, this knit cardigan is as plush as it is practical—like bringing your duvet to brunch."
+                    样例8:"You can’t quite top Joey for her versatility. She's the one to call for your everyday and in-betweens, and when you need a piece of clothing to count on for extra coverage under your layers. Also, she comes with removable bust paddings that are shaped better. How titillating."
+            ### 服装描述(中文)是服装描述(英文)的翻译。
+            ### 3个关键点(英文)是从服装描述中提取三个卖点关键词,每个卖点不超过4个单词,内容尽可能简洁。
+                - 3个关键点(英文)例子如下:
+                - 样例:-Beathable. -H-line fit. -Casual and work wear
+        ## 该衣服信息如下:{info}
+        ## 以下是输出样例的格式给你参考
+            - 样例:{{"服装描述(英文)": "Some skirts are meant for you to dance in—Wei’s one of them. Featuring sunray pleats crafted from lightweight crepe, this midaxi skirt flows with ease while her fully elastic waistband ensures maximum comfort. And the best part? She’s machine washable, so you can wear her on repeat. No laundry or dry cleaning required", "服装描述(中文)":"有些裙子是专为让你尽情跳舞而设计的,薇这款裙子就是其中之一。这款中长款迷笛裙采用轻盈的绉纱面料打造出放射状褶裥,能轻松飘逸摆动,而全松紧腰头则确保了极致的舒适感。最棒的是什么呢?这条裙子可机洗,所以你可以反复穿着。无需手洗或干洗。","3个关键点(英文)": "-Bloat-friendly. -Bump-friendly. -Machine Washable"}}
+        ## 请严格按照标准的json格式输出结果给我,用英文的双引号而不是单引号!请直接回答结果给我,不要回答我除此之外的任何其他内容!输出不要带有其他格式(如换行加粗等)。
+    """
+    return sys_prompt,usr_prompt
+
+def TextControl(info):
+    sys_prompt="你是一个文案总结高手,请对用户输入的文案进行简化,并返回结果,不要对内容进行拓展"
+    usr_prompt=f"""请对下面的英文进行缩短简化,简化后的英文字符数量必须在150-200之间(包括标点符号,请记住不是单词数量,是每个字符数)。请直接返回结果,不要输出其他内容。内容如下:{info}"""
+    return sys_prompt,usr_prompt
+
+
+
+
+
+
+def get_ch_en_selling_title(info):
+    #对这个标题进行翻译
+    sys_prompt="你是一个产品标题专家,请对用户输入的服装信息写标题"
+    usr_prompt=f"""## 我现在要在独立站上写衣服商品的标题,请根据我提供给你的衣服英文商品详情描述提取出对应的信息并写出对应的英文卖点标题。输出的标题包括2个部分,标题(英文)和把标题翻译成中文后的结果。标题单词数:7—10个单词,不能超过这个数量,要是超过请重新思考后再生成。
+        ## 标题框架:卖点+Women/Unisex+二级品类。请严格按照这个框架写卖点的英文标题,语句不需要通顺,只需要从服装商品描述中提取相关关键信息进行组合。
+            ### 卖点内容包括面料、颜色印花和版型设计,其中面料是最重要的信息,其次才是颜色印花和版型设计
+                - 面料:属于香云纱、羊绒、羊毛、羊毛混纺、牦牛绒、骆驼绒、粗花呢、亮片、牛仔、真丝、天丝、醋酸、三醋酸、100%棉、仿麂皮、Pu 革等,如果服装商品描述中包含这些面料,那请优先描述面料的材料(很重要)!
+                - 颜色印花:提花、刺绣、各式印花花色、条纹、渐变色等,这些是很关键的信息
+                - 版型设计:圆领、V领、荷叶边领口、荷叶边下摆、一字肩、露肩、单排扣、双排扣、收腰、高腰、可调节纽扣、抽绳、拼接、撞色边、撞色领、侧边开视、镂空、一片式设计等。
+            ### Women/Unisex:要根据商品详情描述判断是Women还是Unisex
+            ### 二级品类指的是衣服的款式,例如裙子,长筒裤,大衣等。
+        ## 我现在给出的衣服英文商品详情描述信息如下:{info}
+        ## 输出样例的格式如下:
+            - {{"en_tile":"标题(英文)","ch_title":"把标题翻译成中文后的结果"}}
+        ## 以下是一些样例给你参考
+            ### 样例1:
+                - 衣服英文商品详情描述:Versatile and stylish, this shirt is crafted from 10MM mulberry silk, offering a breathable and luxurious wearing experience. The elastic cuffs create a charming ruched effect, adding a touch of delicacy. With its 3/4 sleeves, it's perfect for layering over dresses or tops,combining style and comfort seamlessly, when for a casual day out or a semi-formal event."
+                - 输出结果:{{"en_tile":"10MM Mulberry Silk 3/4 Sleeve Women Front Knot Shirt","ch_title":"10 毫米桑蚕丝材质、3/4 袖长、前襟系扣的女士衬衫"}}
+            ### 样例2:
+                - 衣服英文商品详情描述:A dress that exudes full-on confidence! Made of skin-friendly cotton fabric.The polo collar and unbuttonable buttons make it easy to put on and take off With pockets, a leather belt, gathered waist, and a large-volume flared hem, it's a standout piece for the workplace."
+                - 输出结果:{{"en_tile":"Gathered Waist Cap Sleeve Women Maxi Dress With Leather Belt","ch_title":"配有皮革腰带、腰部抽褶、短袖(灯笼袖)的女士长款连衣裙"}}
+            ### 样例3:
+                - 衣服英文商品详情描述:A stylish twist on classic shorts! These A-line women's shorts are made from <b>four-way stretch twill fabric</b> for comfort and quality. <b>The elastic waistband</b> offers a great fit, flattering all body types. <b>With practical side pockets,</b> they're perfect for casual gatherings or everyday wear, effortlessly showcasing your unique charm."
+                - 输出结果:{{"en_tile":"Stretch Elastic Waist A-Line Women Shorts","ch_title":"弹性松紧腰的女士 A 字短裤"}}
+        ## 请严格按照标准的json格式输出结果给我,用英文的双引号而不是单引号!请直接回答结果给我,不要回答我除此之外的任何其他内容!输出不要带有其他格式(如换行加粗等)。
+    """
+    return sys_prompt,usr_prompt

+ 33 - 0
utils/utils/text_parser.py

@@ -0,0 +1,33 @@
+from volcenginesdkarkruntime import Ark
+
+client = Ark(
+    api_key= "817dff39-5586-4f9b-acba-55004167c0b1",
+    base_url="https://ark.cn-beijing.volces.com/api/v3"
+)
+
+system_prompt = """
+##角色:你是一个强大的文本解析器,能够正确理解文本信息并准确提取其中的关键信息。同时你也是一个专业的服装行业从业者。
+
+##任务:请根据用户提供的文本,提取出以下信息,并以json格式返回:
+{
+    "货号": "xxx",//提取货号信息,也即款号信息
+    "色号": "xxx",//提取色号信息,也即颜色信息,通常由数字字母和中文组成,例如"61Y浅驼",如果没有颜色信息,请返回空
+    "价格": "xxx",//提取价格信息,也即售价信息,如果没有售价信息,请返回空
+    "成分": "xxx",//提取成分信息,也即材质信息,包括材质和成分,填充物、面料等,注意不要遗漏,如果没有成分信息,请返回空
+    "关键词": "xxx",//提取关键词信息,也即款式重点,如果没有关键词信息,请返回空
+    "商品细节": "xxx",//提取商品细节信息,确保信息提取完整,这部分信息通常是以①②③④⑤⑥⑦⑧⑨⑩或1、2、3、4、5、6、7、8、9等编号开头的段落,或者是商品特点、亮点描述信息。如果没有商品细节信息,请返回空。
+}
+
+##注意:请仔细阅读文本,确保提取的信息准确无误,不要遗漏任何关键信息。
+"""
+
+def content_extract(user_prompt):
+    completion = client.chat.completions.create(
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        model="ep-20241018084532-cgm84",
+        temperature = 0.01
+    )
+    return completion.choices[0].message.content