123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- import argparse
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
- from FlagEmbedding import BGEM3FlagModel
- from fastapi import WebSocket
- from util import get_cus_logger
- from marker.converters.pdf import PdfConverter
- from marker.models import create_model_dict
- def load_parse_args():
- # 创建 ArgumentParser 对象
- parser = argparse.ArgumentParser(description="Process some integers.")
- parser.add_argument('--base_url', type=str, default="https://dashscope.aliyuncs.com/compatible-mode/v1",help='llm base_url')
- parser.add_argument('--api_key', type=str, default="sk-04b63960983445f980d85ff185a17876",help='llm api_key')
- parser.add_argument('--model', type=str, choices=['qwen-max', 'gpt-4'], default='qwen-max', help='The model to use')
- parser.add_argument('--static_dir', type=str, default="/workspace", help='the directory for the code to work')
- # 解析命令行参数
- args = parser.parse_args()
- return args
- nltk_path = '/root/nltk_data/tokenizers'
- args = load_parse_args()
- static_dir = '/workspace'
- llm_config={
- "config_list": [
- {
- "model": args.model, # Same as in vLLM command
- "api_key": args.api_key, # Not needed
- "base_url": args.base_url # Your vLLM URL, with '/v1' added
- }
- ],
- "cache_seed": None, # Turns off caching, useful for testing different models
- "temperature": 0.5,
- }
- llm_config_ds={
- "config_list": [
- {
- "model": 'deepseek-r1', # Same as in vLLM command
- "api_key": args.api_key, # Not needed
- "base_url": args.base_url # Your vLLM URL, with '/v1' added
- }
- ],
- "cache_seed": None, # Turns off caching, useful for testing different models
- "temperature": 0.5,
- }
- milvus_url = "http://10.41.1.57:19530"
- bge_model_path = '/model/bge-m3'
- bge_rerank_path = '/model/bge-reranker-v2-m3'
- BASE_UPLOAD_DIRECTORY = '/workspace'
- upload_path = """/workspace/{client_id}/"""
- converter = PdfConverter(
- artifact_dict=create_model_dict(),
- )
- bge_model = BGEM3FlagModel(bge_model_path, use_fp16=True, device='cuda:0') # Setting use_fp16 to True speeds up computation with a slight performance degradation
- bge_rerank_tokenizer = AutoTokenizer.from_pretrained(bge_rerank_path)
- bge_rerank_model = AutoModelForSequenceClassification.from_pretrained(bge_rerank_path)
- bge_rerank_model.to('cuda:0')
- bge_rerank_model.eval()
|