# -*- coding: utf-8 -*- import pytest @pytest.mark.parametrize('text, text_seq_length, bpe_dropout', [ ('hello, how are you?', 128, 0.1), ('hello, how are you?', 128, 0.5), ('hello, how are you?', 128, 1.0), ('hello ... how are you ?', 256, 1.0), ('a person standing at a table with bottles of win', 64, 0.5), ('привет как дела???', 76, 0.0), ('клип на русском языке :)', 76, 0.1), ]) def test_encode_decode_text_yttm(yttm_tokenizer, text, text_seq_length, bpe_dropout): tokens = yttm_tokenizer.encode_text(text, text_seq_length=text_seq_length, bpe_dropout=bpe_dropout) decoded_text = yttm_tokenizer.decode_text(tokens) assert text == decoded_text