Skip to content

Commit e3f4341

Browse files
update olmo thinker template (#1151)
* update olmo-thinker configs * Update open_instruct/dataset_transformation.py Co-authored-by: Hamish Ivison <[email protected]> --------- Co-authored-by: Hamish Ivison <[email protected]>
1 parent 2e01208 commit e3f4341

File tree

1 file changed

+43
-3
lines changed

1 file changed

+43
-3
lines changed

open_instruct/dataset_transformation.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
318318
),
319319
# olmo-core-compatible chat templates:
320320
# TODO: unify these 3 chat templates and send variables through the tokenizer's apply_chat_template kwargs
321-
"olmo": (
321+
"olmo_old": (
322322
"{% set has_system = messages|selectattr('role', 'equalto', 'system')|list|length > 0 %}"
323323
"{% if not has_system %}"
324324
"{{ '<|im_start|>system\nYou are OLMo, a helpful function-calling AI assistant built by Ai2. Your date cutoff is November 2024, and your model weights are available at https://huggingface.co/allenai. You do not currently have access to any functions. <functions></functions><|im_end|>\n' }}"
@@ -361,7 +361,7 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
361361
"olmo_thinker": (
362362
"{% set has_system = messages|selectattr('role', 'equalto', 'system')|list|length > 0 %}"
363363
"{% if not has_system %}"
364-
"{{ '<|im_start|>system\nYou are OLMo, a helpful function-calling AI assistant built by Ai2. Your date cutoff is November 2024, and your model weights are available at https://huggingface.co/allenai. You do not currently have access to any functions. <functions></functions><|im_end|>\n' }}"
364+
"{{ '<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n' }}"
365365
"{% endif %}"
366366
"{% for message in messages %}"
367367
"{% if message['role'] == 'system' %}"
@@ -398,7 +398,7 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
398398
"{% endif %}"
399399
"{% endfor %}"
400400
),
401-
"olmo_thinker_no_think": (
401+
"olmo_thinker_no_think_7b": (
402402
"{% set has_system = messages|selectattr('role', 'equalto', 'system')|list|length > 0 %}"
403403
"{% if not has_system %}"
404404
"{{ '<|im_start|>system\nYou are Olmo, a helpful AI assistant built by Ai2. Your date cutoff is December 2024, and your model weights are available at https://huggingface.co/allenai.<|im_end|>\n' }}"
@@ -438,6 +438,46 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
438438
"{% endif %}"
439439
"{% endfor %}"
440440
),
441+
"olmo_thinker_no_think_sft_tokenization": (
442+
"{% set has_system = messages|selectattr('role', 'equalto', 'system')|list|length > 0 %}"
443+
"{% if not has_system %}"
444+
"{{ '<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n' }}"
445+
"{% endif %}"
446+
"{% for message in messages %}"
447+
"{% if message['role'] == 'system' %}"
448+
"{{ '<|im_start|>system\n' + message['content'] }}"
449+
"{% if message.get('functions', none) is not none %}"
450+
"{{ ' <functions>' + message['functions'] + '</functions><|im_end|>\n' }}"
451+
"{% else %}"
452+
"{{ ' You do not currently have access to any functions. <functions></functions><|im_end|>\n' }}"
453+
"{% endif %}"
454+
"{% elif message['role'] == 'user' %}"
455+
"{% if message.get('functions', none) is not none %}"
456+
"{{ '<|im_start|>user\n' + message['content'] + '\n' + '<functions>' + message['functions'] + '</functions><|im_end|>\n' }}"
457+
"{% else %}"
458+
"{{ '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}"
459+
"{% endif %}"
460+
"{% elif message['role'] == 'assistant' %}"
461+
"{{ '<|im_start|>assistant\n' }}"
462+
"{% if message.get('content', none) is not none %}"
463+
"{{ message['content'] }}"
464+
"{% endif %}"
465+
"{% if message.get('function_calls', none) is not none %}"
466+
"{{ '<function_calls>' + message['function_calls'] + '</function_calls>' }}"
467+
"{% endif %}"
468+
"{% if not loop.last %}"
469+
"{{ '<|im_end|>' + '\n' }}"
470+
"{% else %}"
471+
"{{ eos_token }}"
472+
"{% endif %}"
473+
"{% elif message['role'] == 'environment' %}"
474+
"{{ '<|im_start|>environment\n' + message['content'] + '<|im_end|>\n' }}"
475+
"{% endif %}"
476+
"{% if loop.last and add_generation_prompt %}"
477+
"{{ '<|im_start|>assistant\n' }}"
478+
"{% endif %}"
479+
"{% endfor %}"
480+
),
441481
"olmo_thinker_r1_style": (
442482
"A conversation between user and assistant. "
443483
"The user asks a question, and the assistant solves it. "

0 commit comments

Comments
 (0)