oumi-ai · wizeng23 · Feb 4, 2025 · Feb 3, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/configs/examples/misc/dev_gcp_job.yaml b/configs/examples/misc/dev_gcp_job.yaml
@@ -19,7 +19,7 @@ resources:
   # Multi GPU options: "A100:2", "L4:4"
   accelerators: "A100:1"
 
-  disk_size: 200 # Disk size in GB
+  disk_size: 1000 # Disk size in GB
   # region: us-central1  # Uncomment this line to only consider a specific region.
 
   use_spot: false

diff --git a/configs/projects/chatqa/chatqa_stage1_train.yaml b/configs/projects/chatqa/chatqa_stage1_train.yaml
@@ -14,15 +14,14 @@ model:
   # torch_dtype_str: "float16"
   # model_max_length: 1024
 
-  # model_name: "meta-llama/Meta-Llama-3-8B-Instruct"
+  # model_name: "meta-llama/Llama-3.1-8B-Instruct"
   # torch_dtype_str: "bfloat16"
   # model_max_length: 4096
 
   model_name: "microsoft/Phi-3-mini-4k-instruct"
   torch_dtype_str: "bfloat16"
   model_max_length: 4096
 
-  trust_remote_code: True
   attn_implementation: "sdpa"
 
 data:
@@ -50,7 +49,6 @@ training:
     use_reentrant: False # So gradients flow from frozen (non FT) input embeddings
 
   optimizer: "adamw_torch_fused"
-  # optimizer: "adamw_torch"
   learning_rate: 0.000005
   lr_scheduler_type: "cosine_with_min_lr"
   lr_scheduler_kwargs:

diff --git a/configs/projects/chatqa/chatqa_stage2_train.yaml b/configs/projects/chatqa/chatqa_stage2_train.yaml
@@ -80,7 +80,6 @@ training:
     use_reentrant: False # So gradients flow from frozen (non FT) input embeddings
 
   optimizer: "adamw_torch_fused"
-  # optimizer: "adamw_torch"
   learning_rate: 0.000005
   lr_scheduler_type: "cosine_with_min_lr"
   lr_scheduler_kwargs:

diff --git a/configs/recipes/llama3_1/sft/8b_full/longctx_train.yaml b/configs/recipes/llama3_1/sft/8b_full/longctx_train.yaml
@@ -47,6 +47,8 @@ data:
       - dataset_name: "HuggingFaceFW/fineweb-edu"
         subset: "sample-10BT"
         split: "train"
+        dataset_kwargs:
+          seq_length: 32_768
     target_col: "text"
     use_async_dataset: True
     stream: True

diff --git a/configs/recipes/llama3_3/inference/70b_infer.yaml b/configs/recipes/llama3_3/inference/70b_infer.yaml
@@ -11,7 +11,6 @@
 
 model:
   model_name: "meta-llama/Llama-3.3-70B-Instruct"
-  adapter_model: null  # Update for LoRA-tuned models.
   model_max_length: 2048
   torch_dtype_str: "bfloat16"
   attn_implementation: "sdpa"

diff --git a/configs/recipes/llama3_3/inference/70b_vllm_infer.yaml b/configs/recipes/llama3_3/inference/70b_vllm_infer.yaml
@@ -11,7 +11,6 @@
 
 model:
   model_name: "meta-llama/Llama-3.3-70B-Instruct"
-  adapter_model: null  # Update for LoRA-tuned models.
   model_max_length: 2048
   torch_dtype_str: "bfloat16"
   attn_implementation: "sdpa"

diff --git a/configs/recipes/phi3/dpo/nvidia_80g_train.yaml b/configs/recipes/phi3/dpo/nvidia_80g_train.yaml
@@ -24,8 +24,8 @@ training:
   use_peft: true
   output_dir: "output/phi3.dpo"
   trainer_type: "TRL_DPO"
-  per_device_train_batch_size: 30 # Each batch seems to be approx. 1.8GB
-  gradient_accumulation_steps: 8
+  per_device_train_batch_size: 4
+      gradient_accumulation_steps: 64
 
   # Use for debugging purposes
   # max_steps: 10

diff --git a/configs/recipes/smollm/evaluation/135m/quickstart_alpaca_v2_eval.yaml b/configs/recipes/smollm/evaluation/135m/quickstart_alpaca_v2_eval.yaml
@@ -1,5 +1,9 @@
 # Quickstart Alpaca v2 eval config for SmolLM 135M Instruct.
 #
+# Requirements:
+#   - Run `pip install oumi[evaluation]` or `pip install alpaca_eval`
+#   - Set the env var `OPENAI_API_KEY` to your OpenAI API key.
+#
 # Usage:
 #   oumi evaluate -c configs/recipes/smollm/evaluation/135m/quickstart_alpaca_v2_eval.yaml
 #
@@ -24,3 +28,5 @@ tasks:
   - evaluation_platform: alpaca_eval
     eval_kwargs:
       version: 2.0
+
+# engine: VLLM # Uncomment if you're running on a CUDA GPU.
diff --git a/notebooks/Oumi - A Tour.ipynb b/notebooks/Oumi - A Tour.ipynb
@@ -83,7 +83,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)."
    ]
   },
   {
@@ -509,7 +509,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.11"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/Oumi - Deploying a Job.ipynb b/notebooks/Oumi - Deploying a Job.ipynb
@@ -70,7 +70,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/notebooks/Oumi - Distill a Large Model.ipynb b/notebooks/Oumi - Distill a Large Model.ipynb
@@ -75,7 +75,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/notebooks/Oumi - Finetuning Tutorial.ipynb b/notebooks/Oumi - Finetuning Tutorial.ipynb
@@ -68,7 +68,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/notebooks/Oumi - Launching Jobs on Custom Clusters.ipynb b/notebooks/Oumi - Launching Jobs on Custom Clusters.ipynb
@@ -67,7 +67,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/notebooks/Oumi - Running Jobs Remotely.ipynb b/notebooks/Oumi - Running Jobs Remotely.ipynb
@@ -93,7 +93,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/notebooks/Oumi - Training CNN on Custom Dataset.ipynb b/notebooks/Oumi - Training CNN on Custom Dataset.ipynb
@@ -67,7 +67,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/notebooks/Oumi - Using vLLM Engine for Inference.ipynb b/notebooks/Oumi - Using vLLM Engine for Inference.ipynb
@@ -73,7 +73,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/notebooks/Oumi - Vision Language Models.ipynb b/notebooks/Oumi - Vision Language Models.ipynb
@@ -126,7 +126,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
+    "❗**WARNING:** After the first `pip install`, you may have to restart the notebook for the package updates to take effect (Colab Menu: `Runtime` -> `Restart Session`)"
    ]
   },
   {

diff --git a/src/experimental/configs/projects/zephyr/evaluation/eval.yaml b/src/experimental/configs/projects/zephyr/evaluation/eval.yaml
@@ -10,11 +10,10 @@
 #   - Other eval configs: configs/**/evaluation/
 
 model:
-  # Note: update model_name or adapter_model with your checkpoint directories
+  # Note: Update with your FFT or LoRA checkpoint
   model_name: "mistralai/Mistral-7B-v0.1"
-  adapter_model: "your-checkpoint-dir"
   trust_remote_code: True
-  #TODO currently below are not passed in LM_HARNESS
+  # TODO: currently below are not passed in LM_HARNESS
   torch_dtype_str: "bfloat16"
   model_max_length: 2048
   chat_template: "zephyr"