深圳网站建设V芯ee8888e,郑州哪家医院看妇科比较专业,wordpress支付表单,提高网站收录的方法在 main() 函数的stream循环中#xff0c;我们可以计算每秒钟生成的token数量#xff0c;然后输出 it/s。在流式生成过程中#xff0c;我们可以使用Python的time模块来计算速度。在测试时#xff0c;生成速度会受到多个因素的影响#xff0c;包括设备性能、模型大小、输入…在 main() 函数的stream循环中我们可以计算每秒钟生成的token数量然后输出 it/s。在流式生成过程中我们可以使用Python的time模块来计算速度。在测试时生成速度会受到多个因素的影响包括设备性能、模型大小、输入文本长度等。
import os
import torch
import platform
from colorama import Fore, Style
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.utils import GenerationConfig
import timedef init_model():print(init model ...)model AutoModelForCausalLM.from_pretrained(baichuan-inc/Baichuan-13B-Chat,torch_dtypetorch.float16,device_mapcuda,trust_remote_codeTrue)model.generation_config GenerationConfig.from_pretrained(baichuan-inc/Baichuan-13B-Chat)tokenizer AutoTokenizer.from_pretrained(baichuan-inc/Baichuan-13B-Chat,use_fastFalse,trust_remote_codeTrue)return model, tokenizerdef clear_screen():if platform.system() Windows:os.system(cls)else:os.system(clear)print(Fore.YELLOW Style.BRIGHT 欢迎使用百川大模型输入进行对话clear 清空历史CTRLC 中断生成stream 开关流式生成exit 结束。)return []def main(streamTrue):model, tokenizer init_model()messages clear_screen()while True:prompt input(Fore.GREEN Style.BRIGHT \n用户 Style.NORMAL)if prompt.strip() exit:breakif prompt.strip() clear:messages clear_screen()continueprint(Fore.CYAN Style.BRIGHT \nBaichuan Style.NORMAL, end)if prompt.strip() stream:stream not streamprint(Fore.YELLOW ({}流式生成)\n.format(开启 if stream else 关闭), end)continuemessages.append({role: user, content: prompt})if stream:position 0try:start_time time.time()total_tokens 0for response in model.chat(tokenizer, messages, streamTrue):print(response[position:], end, flushTrue)position len(response)total_tokens len(tokenizer(response, return_tensorspt)[input_ids][0])if torch.backends.mps.is_available():torch.mps.empty_cache()end_time time.time()elapsed_time end_time - start_timetokens_per_second total_tokens / elapsed_timeprint(f\n\n生成速度{tokens_per_second:.2f} tokens/s)except KeyboardInterrupt:passprint()else:response model.chat(tokenizer, messages)print(response)if torch.backends.mps.is_available():torch.mps.empty_cache()messages.append({role: assistant, content: response})print(Style.RESET_ALL)if __name__ __main__:main()