import argparse from transformers import AutoTokenizer, AutoModelForSequenceClassification from FlagEmbedding import BGEM3FlagModel from fastapi import WebSocket from util import get_cus_logger from marker.converters.pdf import PdfConverter from marker.models import create_model_dict def load_parse_args(): # 创建 ArgumentParser 对象 parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument('--base_url', type=str, default="https://dashscope.aliyuncs.com/compatible-mode/v1",help='llm base_url') parser.add_argument('--api_key', type=str, default="sk-04b63960983445f980d85ff185a17876",help='llm api_key') parser.add_argument('--model', type=str, choices=['qwen-max', 'gpt-4'], default='qwen-max', help='The model to use') parser.add_argument('--static_dir', type=str, default="/workspace", help='the directory for the code to work') # 解析命令行参数 args = parser.parse_args() return args nltk_path = '/root/nltk_data/tokenizers' args = load_parse_args() static_dir = '/workspace' llm_config={ "config_list": [ { "model": args.model, # Same as in vLLM command "api_key": args.api_key, # Not needed "base_url": args.base_url # Your vLLM URL, with '/v1' added } ], "cache_seed": None, # Turns off caching, useful for testing different models "temperature": 0.5, } llm_config_ds={ "config_list": [ { "model": 'deepseek-r1', # Same as in vLLM command "api_key": args.api_key, # Not needed "base_url": args.base_url # Your vLLM URL, with '/v1' added } ], "cache_seed": None, # Turns off caching, useful for testing different models "temperature": 0.5, } milvus_url = "http://10.41.1.57:19530" bge_model_path = '/model/bge-m3' bge_rerank_path = '/model/bge-reranker-v2-m3' BASE_UPLOAD_DIRECTORY = '/workspace' upload_path = """/workspace/{client_id}/""" converter = PdfConverter( artifact_dict=create_model_dict(), ) bge_model = BGEM3FlagModel(bge_model_path, use_fp16=True, device='cuda:0') # Setting use_fp16 to True speeds up computation with a slight performance degradation bge_rerank_tokenizer = AutoTokenizer.from_pretrained(bge_rerank_path) bge_rerank_model = AutoModelForSequenceClassification.from_pretrained(bge_rerank_path) bge_rerank_model.to('cuda:0') bge_rerank_model.eval()