o
    0i/                     @   sn   d dl Zd dlZd dlmZ d dlmZ G dd dZG dd dejj	Z
dd	efd
dZdd	efddZdS )    N)
DataLoader)DistributedTypec                   @   s&   e Zd ZdddZdd Zd	d
 ZdS )RegressionDataset      @   Nc                 C   sV   t j|}|| _|j|fdt j| _|| j | |jd|fdt j | _d S )N)sizeg?)scaler   )	nprandomdefault_rnglengthnormalastypefloat32xy)selfabr   seedrng r   `/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/accelerate/test_utils/training.py__init__   s   ,zRegressionDataset.__init__c                 C   s   | j S N)r   )r   r   r   r   __len__   s   zRegressionDataset.__len__c                 C   s   | j | | j| dS )Nr   r   r   )r   ir   r   r   __getitem__    s   zRegressionDataset.__getitem__)r   r   r   N)__name__
__module____qualname__r   r   r   r   r   r   r   r      s    
r   c                       s(   e Zd Zd fdd	Zd	ddZ  ZS )
RegressionModelr   Fc                    sD   t    tjt| | _tjt| | _d| _	d S )NT)
superr   torchnn	Parametertensorfloatr   r   first_batch)r   r   r   double_output	__class__r   r   r   %   s   

zRegressionModel.__init__Nc                 C   s@   | j rtd| jj d| jj d|j  d| _ || j | j S )NzModel dtype: z, z. Input dtype: F)r*   printr   dtyper   )r   r   r   r   r   forward+   s   $zRegressionModel.forward)r   r   Fr   )r    r!   r"   r   r0   __classcell__r   r   r,   r   r#   $   s    r#      
batch_sizec                    s   ddl m} ddlm} |dddd}|d|d	}|d
 d}dd t|D fdd}|j|dg dd} fdd}	t|d
 d|	dd}
t|d d|	dd}|
|fS )Nr   load_datasetAutoTokenizerzbert-base-cased!tests/test_samples/MRPC/train.csvtests/test_samples/MRPC/dev.csvtrain
validationcsv
data_filesr;   labelc                 S   s   i | ]\}}||qS r   r   ).0r   vr   r   r   
<dictcomp>;   s    z&mocked_dataloaders.<locals>.<dictcomp>c                    s@   | d | d dd dd}d| v r fdd| d D |d	< |S )
N	sentence1	sentence2T
max_length)
truncationrF   paddingr@   c                    s   g | ]} | qS r   r   )rA   l)label_to_idr   r   
<listcomp>C   s    zAmocked_dataloaders.<locals>.tokenize_function.<locals>.<listcomp>labelsr   examplesoutputs)rJ   	tokenizerr   r   tokenize_function=   s   z-mocked_dataloaders.<locals>.tokenize_functionTrD   rE   r@   batchedremove_columnsc                    s.    j tjkrj| ddddS j| dddS )NrF      pt)rH   rF   return_tensorslongest)rH   rX   )distributed_typer   XLApad)rN   acceleratorrP   r   r   
collate_fnM   s   z&mocked_dataloaders.<locals>.collate_fnr   shuffler_   r3   r<   F   )	datasetsr5   transformersr7   from_pretrainedunique	enumeratemapr   )r^   r3   r5   r7   r?   rc   
label_listrQ   tokenized_datasetsr_   train_dataloadereval_dataloaderr   )r^   rJ   rP   r   mocked_dataloaders2   s"   


rm   c                    s   ddl m} ddlm} |dj_ddd}|d|d	}fd
d}   |j|dg dd}W d    n1 s?w   Y   fdd}t	|d d|dd}	t	|d d|dd}
|	|
fS )Nr   r4   r6   zHuggingFaceTB/SmolLM-360Mr8   r9   r:   r=   r>   c                    s    | d dd dd}|S )NrD   TF)rG   rF   return_attention_maskr   rM   )rP   r   r   rQ   d   s   zGmocked_dataloaders_for_autoregressive_models.<locals>.tokenize_functionTrR   rS   c                    s    j tjkrdntdd | D } jdkrd}n
 jdkr!d}nd }j| d|d	 |d
d}|d d d d	d f |d< |d d d d df |d< t|d jkd|d |d< |S )NrV   c                 S   s   g | ]}t |d  qS )	input_ids)len)rA   er   r   r   rK   w   s    zTmocked_dataloaders_for_autoregressive_models.<locals>.collate_fn.<locals>.<listcomp>fp8r2   no   rF   rb   rW   )rH   rF   pad_to_multiple_ofrX   ro   rL   i)	rZ   r   r[   maxmixed_precisionr\   r%   wherepad_token_id)rN   rF   ru   batchr]   r   r   r_   r   s(   

 z@mocked_dataloaders_for_autoregressive_models.<locals>.collate_fnr;   Fr   r`   r<   rb   )
rc   r5   rd   r7   re   	eos_token	pad_tokenmain_process_firstrh   r   )r^   r3   r5   r7   r?   rc   rQ   rj   r_   rk   rl   r   r]   r   ,mocked_dataloaders_for_autoregressive_modelsZ   s$   


r   )r2   )numpyr
   r%   torch.utils.datar   accelerate.utils.dataclassesr   r   r&   Moduler#   intrm   r   r   r   r   r   <module>   s   (