add
cd /hy-tmp/jointrouter pip install vllm==0.11.0 CUDA_VISIBLE_DEVICES=0 VLLM_USE_V1=1 vllm serve /hy-tmp/Qwen2.5-3B-Instruct –dtype auto -tp 1 –gpu-memory-utilization 0.75 –port 30000 –max-model-len 10600 –enable-timeout-notification –request-timeout 30.0 –load-balancer-url “http://localhost:8080"
CUDA_VISIBLE_DEVICES=1 VLLM_USE_V1=1 vllm serve /hy-tmp/Qwen2.5-7B-Instruct –dtype auto -tp 1 –gpu-memory-utilization 0.75 –port 30001 –max-model-len 10600 –enable-timeout-notification –request-timeout 30.0 –load-balancer-url “http://localhost:8080"
DeepSeek 7B:
CUDA_VISIBLE_DEVICES=0 VLLM_USE_V1=1 vllm serve /hy-tmp/DeepSeek-R1-Distill-Qwen-7B –dtype auto -tp 1 –gpu-memory-utilization 0.75 –port 8080 –max-model-len 12600 –enable-timeout-notification –request-timeout 30.0 –load-balancer-url “http://i-2.gpushare.com:26224" 注意一件事:my_scheduler.py 里的 api_prefix_urls 必须和你这几个 vLLM 端口对应上,不 然路由器连不到模型。 python3 -m spec_scheduling.myclient –strategy round_robin –port 8080
python3 -m spec_scheduling.multi_turn_example –strategy round_robin –data_type prediction_results –data_path /hy-tmp/jointrouter/Route-To-Reason-main/data/split_data/train.csv –profile –profile_server_index 0 –profile_result_name qwen3b_train/result_qwen3b_train –num_request 1470
加入新的数据集
export MODEL=Meta-Llama-3.1-8B python3 /hy-tmp/humaneval_tools/run_humaneval_local.py –dataset /hy-tmp/HumanEval.jsonl.gz –model-path /hy-tmp/MODEL\ −−model−nameMODEL \ --model-nameMODEL\ −−model−nameMODEL –strategy direct –max-new-tokens 10240 –output /hy-tmp/humaneval_${MODEL}_direct.jsonl
python3 /hy-tmp/humaneval_tools/evaluate_humaneval.py –dataset /hy-tmp/HumanEval.jsonl.gz –generations /hy-tmp/humaneval_qwen3b_direct.jsonl –results-jsonl /hy-tmp/humaneval_qwen3b_direct_results.jsonl –results-csv /hy-tmp/humaneval_qwen3b_direct_test.csv
旧是数据集 python /hy-tmp/humaneval_tools/run_route_test_local.py –dataset /hy-tmp/jointrouter/Route-To-Reason-main/data/split_data/test.csv –model-path /hy-tmp/Qwen2.5-3B –model-name Qwen2.5-3B –dataset-model-filter Qwen2.5-3B-Instruct –output /hy-tmp/route_test_results.jsonl –token-positions 1,2,4,8,16,last_token
python /hy-tmp/humaneval_tools/run_route_test_local.py –dataset /hy-tmp/jointrouter/Route-To-Reason-main/data/split_data/test.csv –model-path /hy-tmp/Qwen2.5-7B-Instruct –model-name Qwen2.5-7B-Instruct –dataset-model-filter Qwen2.5-7B-Instruct –output /hy-tmp/route_test_results_7b.jsonl –token-positions 1,2,4,8,16,last_token
版权所有:中国计算机学会技术支持:开源发展技术委员会 京ICP备13000930号-9 京公网安备 11010802032778号
cd /hy-tmp/jointrouter pip install vllm==0.11.0 CUDA_VISIBLE_DEVICES=0 VLLM_USE_V1=1 vllm serve /hy-tmp/Qwen2.5-3B-Instruct
–dtype auto -tp 1 –gpu-memory-utilization 0.75
–port 30000 –max-model-len 10600
–enable-timeout-notification –request-timeout 30.0
–load-balancer-url “http://localhost:8080"
CUDA_VISIBLE_DEVICES=1 VLLM_USE_V1=1 vllm serve /hy-tmp/Qwen2.5-7B-Instruct
–dtype auto -tp 1 –gpu-memory-utilization 0.75
–port 30001 –max-model-len 10600
–enable-timeout-notification –request-timeout 30.0
–load-balancer-url “http://localhost:8080"
DeepSeek 7B:
CUDA_VISIBLE_DEVICES=0 VLLM_USE_V1=1 vllm serve /hy-tmp/DeepSeek-R1-Distill-Qwen-7B
–dtype auto -tp 1 –gpu-memory-utilization 0.75
–port 8080 –max-model-len 12600
–enable-timeout-notification –request-timeout 30.0
–load-balancer-url “http://i-2.gpushare.com:26224" 注意一件事:my_scheduler.py 里的 api_prefix_urls 必须和你这几个 vLLM 端口对应上,不 然路由器连不到模型。 python3 -m spec_scheduling.myclient –strategy round_robin –port 8080
python3 -m spec_scheduling.multi_turn_example
–strategy round_robin
–data_type prediction_results
–data_path /hy-tmp/jointrouter/Route-To-Reason-main/data/split_data/train.csv
–profile
–profile_server_index 0
–profile_result_name qwen3b_train/result_qwen3b_train
–num_request 1470
加入新的数据集
DeepSeek-R1-Distill-Qwen-7B
export MODEL=Meta-Llama-3.1-8B python3 /hy-tmp/humaneval_tools/run_humaneval_local.py
–dataset /hy-tmp/HumanEval.jsonl.gz
–model-path /hy-tmp/MODEL\ −−model−nameMODEL
–strategy direct
–max-new-tokens 10240
–output /hy-tmp/humaneval_${MODEL}_direct.jsonl
python3 /hy-tmp/humaneval_tools/evaluate_humaneval.py
–dataset /hy-tmp/HumanEval.jsonl.gz
–generations /hy-tmp/humaneval_qwen3b_direct.jsonl
–results-jsonl /hy-tmp/humaneval_qwen3b_direct_results.jsonl
–results-csv /hy-tmp/humaneval_qwen3b_direct_test.csv
旧是数据集 python /hy-tmp/humaneval_tools/run_route_test_local.py
–dataset /hy-tmp/jointrouter/Route-To-Reason-main/data/split_data/test.csv
–model-path /hy-tmp/Qwen2.5-3B
–model-name Qwen2.5-3B
–dataset-model-filter Qwen2.5-3B-Instruct
–output /hy-tmp/route_test_results.jsonl
–token-positions 1,2,4,8,16,last_token
python /hy-tmp/humaneval_tools/run_route_test_local.py
–dataset /hy-tmp/jointrouter/Route-To-Reason-main/data/split_data/test.csv
–model-path /hy-tmp/Qwen2.5-7B-Instruct
–model-name Qwen2.5-7B-Instruct
–dataset-model-filter Qwen2.5-7B-Instruct
–output /hy-tmp/route_test_results_7b.jsonl
–token-positions 1,2,4,8,16,last_token