o
    i                     @   sJ   d dl mZ d dlmZ d dlmZ d dlmZmZ dede	fddZ
d	S )
    )Path)Any)TikTokenConverter)TIKTOKEN_VOCAB_FILETOKENIZER_FILEencoding
output_dirc              
   C   s   t |}|jdd |d t }|t }|jjddd t| }t| }zddlm} ddl	m
} t| tr>|| } || j| W n  tye } zt|}	d|	 v r\td	|td
|d}~ww t|| j| jd }
|
| dS )a  
    Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
    on disk.

    Args:
        encoding (`str` or `tiktoken.Encoding`):
            Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
            `tiktoken.get_encoding(encoding)`.
        output_dir (`str`):
            Save path for converted tokenizer configuration file.
    T)exist_oktiktoken)parentsr	   r   )get_encoding)dump_tiktoken_bpeblobfilezY`blobfile` is required to save a `tiktoken` file. Install it with `pip install blobfile`.zY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.N)
vocab_filepatternextra_special_tokens)r   mkdirr   r   parentstrabsoluter
   r   tiktoken.loadr   
isinstance_mergeable_ranksImportErrorlower
ValueErrorr   _pat_str_special_tokens	convertedsave)r   r   	save_filetokenizer_filesave_file_absoluteoutput_file_absoluter   r   e	error_msg	tokenizer r'   d/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/integrations/tiktoken.pyconvert_tiktoken_to_fast   sD   


r)   N)pathlibr   typingr   #transformers.convert_slow_tokenizerr   *transformers.tokenization_utils_tokenizersr   r   r   r)   r'   r'   r'   r(   <module>   s
    