通义千问的快速开始（其他模型也一样，具体看模型github）

from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
from transformers import set_seed
def _get_input() -> str:
    while True:
        try:
            message = input('用户 > ').strip()
        except UnicodeDecodeError:  # 输入包含无法解码的字符
            print('[ERROR] Encoding error in input')
            continue
        except KeyboardInterrupt:  # 按下 Ctrl+C 导致 KeyboardInterrupt 异常
            exit(1)
        if message:  # Not NULL
            return message
        print('[ERROR] Query is empty')

# 加载预训练模型的tokenizer
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", revision='master',trust_remote_code=True)

# 加载预训练的模型Qwen 1.8B
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat",device_map="auto",trust_remote_code=True).eval()

# 从预训练模型“Qwen/Qwen-1_8B-Chat”加载生成任务的配置信息，如最大生成长度、温度控制、top_k采样等
config = GenerationConfig.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True, resume_download=True,)

history, response = [], ''



while True:
    query = _get_input()
    '''
    可实现输入的功能：
    :help / :h          Show this help message              显示帮助信息
        :exit / :quit / :q  Exit the demo                       退出Demo
        :clear / :cl        Clear screen                        清屏
        :clear-his / :clh   Clear history                       清除对话历史
        :history / :his     Show history                        显示对话历史
        :seed               Show current random seed            显示当前随机种子
        :seed <N>           Set random seed to <N>              设置随机种子
        :conf               Show current generation config      显示生成配置
        :conf <key>=<value> Change generation config            修改生成配置
        :reset-conf         Reset generation config             重置生成配置
    '''

    # Run
    seed = 1234
    set_seed(seed)
    # 可以选择或
    """
    TextIteratorStreamer实现流式输出模块
    ...
    TextStreamer实现流式输出模块
    ...
    """
    for response in model.chat_stream(tokenizer,query,history=history,generation_config=config):
        print(f"
用户:{query}")
        print(f"
千问:{response}")

    history.append((query,response))

代码模块解析

```
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", revision='master',trust_remote_code=True)
```
这行代码从Hugging Face Model Hub加载指定的预训练模型“Qwen/Qwen-1_8B-Chat”的tokenizer。AutoTokenizer是一个自动下载和初始化相应预训练模型所需tokenizer类的工具。这里的revision='master'表示使用仓库的主分支版本。trust_remote_code=True表示允许执行远程代码（在某些情况下，模型或tokenizer可能包含额外的自定义逻辑）。
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval() 这行代码加载了与上述tokenizer配套的预训练模型，该模型是为因果语言建模任务准备的（即可以用于聊天、文本生成等）。AutoModelForCausalLM会根据模型类型自动选择合适的模型类。device_map="auto"意味着它将根据当前环境自动选择GPU/CPU进行模型设备分配。同样地，trust_remote_code=True表明允许执行远程代码。.eval()方法将模型设置为评估模式，这意味着模型在后续计算中不会进行反向传播更新权重。
config = GenerationConfig.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True, resume_download=True) 这行代码尝试从预训练模型“Qwen/Qwen-1_8B-Chat”加载生成任务的配置信息，如最大生成长度、温度控制、top_k采样等。resume_download=True表示在下载过程中遇到问题时能够从中断的地方继续下载。
model.chat()返回模型的回答信息并输出，由于是一个一个字返回的，可以选择流式输出，这样反馈好点。