Skip to content

Commit f1e0063

Browse files
committed
add qwen3 chat template
1 parent 0cc2e29 commit f1e0063

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed

open_instruct/dataset_transformation.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,69 @@ def visualize_token_role(tokens: list[int], masks: list[int], tokenizer: PreTrai
660660
"{% endif %}"
661661
"{% endfor %}"
662662
),
663+
"qwen2.5": (
664+
"{% if messages[0]['role'] == 'system' %}"
665+
"{{ '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}"
666+
"{% else %}"
667+
"{{ '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}"
668+
"{% endif %}"
669+
"{% for message in messages %}"
670+
"{% if message['role'] == 'user' or (message['role'] == 'system' and not loop.first) %}"
671+
"{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}"
672+
"{% elif message['role'] == 'assistant' %}"
673+
"{{ '<|im_start|>assistant\\n' + message['content'] }}"
674+
"{% if not loop.last %}"
675+
"{{ '<|im_end|>\\n' }}"
676+
"{% else %}"
677+
"{{ eos_token }}"
678+
"{% endif %}"
679+
"{% endif %}"
680+
"{% if loop.last and add_generation_prompt %}"
681+
"{{ '<|im_start|>assistant\\n' }}"
682+
"{% endif %}"
683+
"{% endfor %}"
684+
),
685+
"qwen3": (
686+
"{% if messages[0].role == 'system' %}"
687+
"{{ '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}"
688+
"{% endif %}"
689+
"{% for message in messages %}"
690+
"{% if message.content is string %}"
691+
"{% set content = message.content %}"
692+
"{% else %}"
693+
"{% set content = '' %}"
694+
"{% endif %}"
695+
"{% if (message.role == 'user') or (message.role == 'system' and not loop.first) %}"
696+
"{{ '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>\\n' }}"
697+
"{% elif message.role == 'assistant' %}"
698+
"{% set reasoning_content = '' %}"
699+
"{% if message.reasoning_content is string %}"
700+
"{% set reasoning_content = message.reasoning_content %}"
701+
"{% else %}"
702+
"{% if '</think>' in content %}"
703+
"{% set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}"
704+
"{% set content = content.split('</think>')[-1].lstrip('\\n') %}"
705+
"{% endif %}"
706+
"{% endif %}"
707+
"{% if loop.index0 > ns.last_query_index %}"
708+
"{% if loop.last or (not loop.last and reasoning_content) %}"
709+
"{{ '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}"
710+
"{% else %}"
711+
"{{ '<|im_start|>' + message.role + '\\n' + content }}"
712+
"{% endif %}"
713+
"{% else %}"
714+
"{{ '<|im_start|>' + message.role + '\\n' + content }}"
715+
"{% endif %}"
716+
"{{ '<|im_end|>\\n' }}"
717+
"{% endif %}"
718+
"{% endfor %}"
719+
"{% if add_generation_prompt %}"
720+
"{{ '<|im_start|>assistant\\n' }}"
721+
"{% if enable_thinking is defined and enable_thinking is false %}"
722+
"{{ '<think>\\n\\n</think>\\n\\n' }}"
723+
"{% endif %}"
724+
"{% endif %}"
725+
),
663726
}
664727
# flake8: noqa
665728

0 commit comments

Comments
 (0)