You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

17 lines
722 B

# -*- coding: utf-8 -*-
import pytest
@pytest.mark.parametrize('text, text_seq_length, bpe_dropout', [
('hello, how are you?', 128, 0.1),
('hello, how are you?', 128, 0.5),
('hello, how are you?', 128, 1.0),
('hello ... how are you ?', 256, 1.0),
('a person standing at a table with bottles of win', 64, 0.5),
('привет как дела???', 76, 0.0),
('клип на русском языке :)', 76, 0.1),
])
def test_encode_decode_text_yttm(yttm_tokenizer, text, text_seq_length, bpe_dropout):
tokens = yttm_tokenizer.encode_text(text, text_seq_length=text_seq_length, bpe_dropout=bpe_dropout)
decoded_text = yttm_tokenizer.decode_text(tokens)
assert text == decoded_text