8000 Qwen2.5-3B,四卡训练不起来 · Issue #61 · Visual-Agent/DeepEyes · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
Qwen2.5-3B,四卡训练不起来 #61
Open
@qinguangming1999

Description

@qinguangming1999

我有四张A100.下面是我的config
set -x

PROJECT_NAME="agent_vlagent"
EXPERIMENT_NAME="debug_for_single_node"

export SAVE_CHECKPOINT_DIR=verl_checkpoints

export VLLM_ATTENTION_BACKEND=XFORMERS # vllm + qwen2-7b with flash_attn has some issues

BASEDIR=data
VISUAL_DATASET_TRAIN_0_6_2=${BASEDIR}/data_v0.6.2_reason.parquet
VISUAL_DATASET_TRAIN_0_1_2=${BASEDIR}/data_0.1.2_visual_toolbox_v2.parquet
VISUAL_DATASET_TRAIN_0_8=${BASEDIR}/data_v0.8_visual_toolbox_v2.parquet
VISUAL_DATASET_TEST=${BASEDIR}/seekworld_test.parquet
EUREKA_DATASET_TRAIN=${BASEDIR}/data_thinklite_reasoning_acc.parquet

REF_MODEL_PATH=../Qwen/Qwen2.5-VL-3B
PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo
+debug=False
+vs_debug=False
data.train_files=[${VISUAL_DATASET_TRAIN_0_1_2},${VISUAL_DATASET_TRAIN_0_8},${EUREKA_DATASET_TRAIN}]
data.val_files=[${EUREKA_DATASET_TRAIN}]
data.train_batch_size=4
data.max_prompt_length=8192
data.max_response_length=20480
data.return_raw_chat=True
data.filter_overlong_prompts=True
algorithm.adv_estimator=grpo
algorithm.kl_ctrl.kl_coef=0.0
actor_rollout_ref.model.path=${REF_MODEL_PATH}
actor_rollout_ref.model.use_remove_padding=True
actor_rollout_ref.actor.optim.lr=1e-6
actor_rollout_ref.actor.ppo_mini_batch_size=2
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=1
actor_rollout_ref.actor.use_kl_loss=False
actor_rollout_ref.actor.kl_loss_coef=0.0
actor_rollout_ref.actor.kl_loss_type=low_var_kl
actor_rollout_ref.actor.entropy_coeff=0.0
actor_rollout_ref.actor.checkpoint.contents=['model','hf_model','optimizer','extra']
actor_rollout_ref.actor.ulysses_sequence_parallel_size=1
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=1
actor_rollout_ref.rollout.tensor_model_parallel_size=1
actor_rollout_ref.rollout.name=vllm
actor_rollout_ref.rollout.n=2
actor_rollout_ref.rollout.max_num_batched_tokens=32768
actor_rollout_ref.rollout.gpu_memory_utilization=0.9
actor_rollout_ref.rollout.enforce_eager=False
actor_rollout_ref.rollout.free_cache_engine=False
actor_rollout_ref.rollout.enable_chunked_prefill=False
actor_rollout_ref.actor.fsdp_config.param_offload=True
actor_rollout_ref.actor.fsdp_config.optimizer_offload=True
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1
actor_rollout_ref.ref.fsdp_config.param_offload=True
actor_rollout_ref.rollout.agent.activate_agent=True
actor_rollout_ref.rollout.agent.tool_name_key=env_name
actor_rollout_ref.rollout.agent.single_response_max_tokens=10240
actor_rollout_ref.rollout.agent.max_turns=3
actor_rollout_ref.rollout.agent.concurrent_workers=1
actor_rollout_ref.rollout.agent.show_tqdm=True
trainer.critic_warmup=0
trainer.logger=['console','wandb','rl_logging_board']
trainer.val_before_train=False
trainer.n_gpus_per_node=4
trainer.nnodes=${WORLD_SIZE}
trainer.save_freq=8
trainer.test_freq=10000
trainer.project_name=${PROJECT_NAME}
trainer.experiment_name=${EXPERIMENT_NAME}
trainer.default_local_dir=${SAVE_CHECKPOINT_DIR}/${PROJECT_NAME}/${EXPERIMENT_NAME}
+trainer.tensorboard_dir=${SAVE_CHECKPOINT_DIR}/logs/tensorboard
+trainer.rl_logging_board_dir=${SAVE_CHECKPOINT_DIR}/logs/rl_logging_board
trainer.total_epochs=32 2>&1 | tee ./logs/${EXPERIMENT_NAME}.log

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      0