allenai
diff --git a/‎.github/workflows/quality.yml‎
Lines changed: 3 additions & 9 deletions b/‎.github/workflows/quality.yml‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎mason.py‎
Lines changed: 24 additions & 22 deletions b/‎mason.py‎
Lines changed: 24 additions & 22 deletions
diff --git a/‎open_instruct/actor_manager.py‎
Lines changed: 3 additions & 3 deletions b/‎open_instruct/actor_manager.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎open_instruct/benchmark_generators.py‎
Lines changed: 0 additions & 2 deletions b/‎open_instruct/benchmark_generators.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎open_instruct/dataset_transformation.py‎
Lines changed: 45 additions & 0 deletions b/‎open_instruct/dataset_transformation.py‎
Lines changed: 45 additions & 0 deletions
@@ -17,14 +17,8 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v4
         with:
-          version: "0.5.11"
-      - name: Set up Python
-        run: uv python install 3.10
-      - name: Install dependencies
-        run: uv sync --frozen --only-group dev
+          version: "0.8.8"
       - name: Code quality
         run: |
-          source .venv/bin/activate
-          ruff format --check --diff open_instruct
-          ruff check --exit-non-zero-on-fix open_instruct
-        
+          uv run ruff format --check --diff open_instruct mason.py
+          uv run ruff check --exit-non-zero-on-fix open_instruct mason.py
@@ -13,6 +13,8 @@
 from rich.console import Console
 from rich.text import Text
 
+from open_instruct.utils import GCP_CLUSTERS, INTERCONNECT_CLUSTERS, WEKA_CLUSTERS
+
 console = Console()
 
 
@@ -87,11 +89,6 @@ def parse_env_var(env_var_str: str) -> dict[str, str]:
     return {"name": name, "value": value}
 
 
-WEKA_CLUSTERS = ["ai2/jupiter", "ai2/saturn", "ai2/titan", "ai2/neptune", "ai2/ceres", "ai2/triton", "ai2/rhea"]
-GCP_CLUSTERS = ["ai2/augusta"]
-
-INTERCONNECT_CLUSTERS = ["ai2/jupiter", "ai2/ceres", "ai2/titan", "ai2/augusta"]
-
 # by default, we turn off vllm compile cache
 # torch compile caching seems consistently broken, but the actual compiling isn't.
 # Not sure why, for now we have disabled the caching (VLLM_DISABLE_COMPILE_CACHE=1).
@@ -589,24 +586,29 @@ def make_internal_command(command: list[str], args: argparse.Namespace, whoami:
                     model_revision = command[idx + 1]
                     break
 
-            commit_hash = get_commit_hash(model_name_or_path, model_revision, "config.json", "model")
-            if os.path.exists(model_name_or_path):
-                path = model_name_or_path
-                assert args.gs_model_name is not None, "for local models to upload to gs, you must set --gs_model_name"
-                model_name_or_path = args.gs_model_name
-                commit_hash = hashlib.md5(model_name_or_path.encode("utf-8")).hexdigest()[:8]
-                console.log(
-                    f"Local model is already downloaded, using gs_model_name {model_name_or_path}, with hash of model path {commit_hash}"
-                )
+            if model_name_or_path.startswith("gs://"):
+                gs_saved_path = model_name_or_path
             else:
-                download_from_hf(model_name_or_path, model_revision)  # first download the model
-                path = download_from_hf(model_name_or_path, model_revision)  # then get the path
-            gs_saved_path = f"gs://ai2-llm/post-training/deletable_cache_models/{model_name_or_path}/{commit_hash}"
-            gs_folder = gs_folder_exists(
-                gs_saved_path
-            )  # race condition exists, but it's fine since we are launching mason sequentially
-            if not gs_folder:
-                upload_to_gs_bucket(path, gs_saved_path)
+                commit_hash = get_commit_hash(model_name_or_path, model_revision, "config.json", "model")
+                if os.path.exists(model_name_or_path):
+                    path = model_name_or_path
+                    assert args.gs_model_name is not None, (
+                        "for local models to upload to gs, you must set --gs_model_name"
+                    )
+                    model_name_or_path = args.gs_model_name
+                    commit_hash = hashlib.md5(model_name_or_path.encode("utf-8")).hexdigest()[:8]
+                    console.log(
+                        f"Local model is already downloaded, using gs_model_name {model_name_or_path}, with hash of model path {commit_hash}"
+                    )
+                else:
+                    download_from_hf(model_name_or_path, model_revision)  # first download the model
+                    path = download_from_hf(model_name_or_path, model_revision)  # then get the path
+                gs_saved_path = f"gs://ai2-llm/post-training/deletable_cache_models/{model_name_or_path}/{commit_hash}"
+                gs_folder = gs_folder_exists(
+                    gs_saved_path
+                )  # race condition exists, but it's fine since we are launching mason sequentially
+                if not gs_folder:
+                    upload_to_gs_bucket(path, gs_saved_path)
 
             download_path = gs_saved_path.replace("gs://", "/gs/")
             download_path_without_last_folder = download_path.rsplit("/", 1)[0]
 
@@ -110,9 +110,9 @@ async def api_status():
                 "queues": queues_data,
                 "token_stats": self.get_token_stats(),
                 "timing_stats": self.get_timing_stats(),
-                "kv_cache_max_concurrency": self._kv_cache_max_concurrency,
-                # This is less confusing to users.
-                "inference_batch_size": self._args.inference_batch_size * self._args.num_samples_per_prompt_rollout,
+                "concurrency_per_engine": self._kv_cache_max_concurrency,
+                "total_concurrency": self._kv_cache_max_concurrency * self._args.vllm_num_engines,
+                "batch_size": self._args.num_unique_prompts_rollout * self._args.num_samples_per_prompt_rollout,
             }
 
         def run_server():
 
@@ -263,8 +263,6 @@ def setup_vllm_engines(
         prompt_queue=param_prompt_Q,
         results_queue=inference_results_Q,
         actor_manager=actor_manager,
-        inference_batch_size=args.inference_batch_size,
-        use_fp8_kv_cache=args.use_fp8_kv_cache,
         inflight_updates=args.inflight_updates,
     )
 
 
@@ -438,6 +438,51 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
         "{% endif %}"
         "{% endfor %}"
     ),
+    "olmo_thinker_remove_intermediate_thinking": (
+        "{% set has_system = messages|selectattr('role', 'equalto', 'system')|list|length > 0 %}"
+        "{% if not has_system %}"
+        "{{ '<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n' }}"
+        "{% endif %}"
+        "{% for message in messages %}"
+        "{% if message['role'] == 'system' %}"
+        "{{ '<|im_start|>system\n' + message['content'] }}"
+        "{% if message.get('functions', none) is not none %}"
+        "{{ ' <functions>' + message['functions'] + '</functions><|im_end|>\n' }}"
+        "{% else %}"
+        "{{ ' You do not currently have access to any functions. <functions></functions><|im_end|>\n' }}"
+        "{% endif %}"
+        "{% elif message['role'] == 'user' %}"
+        "{% if message.get('functions', none) is not none %}"
+        "{{ '<|im_start|>user\n' + message['content'] + '\n' + '<functions>' + message['functions'] + '</functions><|im_end|>\n' }}"
+        "{% else %}"
+        "{{ '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}"
+        "{% endif %}"
+        "{% elif message['role'] == 'assistant' %}"
+        "{{ '<|im_start|>assistant\n' }}"
+        "{% set content = message.get('content', none) %}"
+        "{% if content is not none %}"
+        "{% set content = content | string %}"
+        "{% if not loop.last and '</think>' in content and '<think>' in content %}"
+        "{% set content = content.split('</think>')[-1].lstrip('\\n') %}"
+        "{% endif %}"
+        "{{ content }}"
+        "{% endif %}"
+        "{% if message.get('function_calls', none) is not none %}"
+        "{{ '<function_calls>' + message['function_calls'] + '</function_calls>' }}"
+        "{% endif %}"
+        "{% if not loop.last %}"
+        "{{ '<|im_end|>' + '\n' }}"
+        "{% else %}"
+        "{{ eos_token }}"
+        "{% endif %}"
+        "{% elif message['role'] == 'environment' %}"
+        "{{ '<|im_start|>environment\n' + message['content'] + '<|im_end|>\n' }}"
+        "{% endif %}"
+        "{% if loop.last and add_generation_prompt %}"
+        "{{ '<|im_start|>assistant\n<think>' }}"
+        "{% endif %}"
+        "{% endfor %}"
+    ),
     "olmo_thinker_no_think_sft_tokenization": (
         "{% set has_system = messages|selectattr('role', 'equalto', 'system')|list|length > 0 %}"
         "{% if not has_system %}"
Original file line number	Diff line number	Diff line change
`@@ -263,8 +263,6 @@ def setup_vllm_engines(`
`263`	`263`	`prompt_queue=param_prompt_Q,`
`264`	`264`	`results_queue=inference_results_Q,`
`265`	`265`	`actor_manager=actor_manager,`
`266`		`- inference_batch_size=args.inference_batch_size,`
`267`		`- use_fp8_kv_cache=args.use_fp8_kv_cache,`
`268`	`266`	`inflight_updates=args.inflight_updates,`
`269`	`267`	`)`
`270`	`268`