Source code for kokorog2p.backends.espeak.backend

"""High-level espeak backend for Kokoro TTS phonemization.

This module provides a convenient interface for converting text to phonemes
using espeak-ng, with automatic conversion to Kokoro's phoneme format.

Copyright 2024 kokorog2p contributors
Licensed under the Apache License, Version 2.0
"""

import re
from typing import cast

from kokorog2p.backends.espeak.cli_wrapper import CliPhonemizer
from kokorog2p.backends.espeak.phonemizer_base import EspeakPhonemizerBase
from kokorog2p.backends.espeak.wrapper import Phonemizer
from kokorog2p.phonemes import from_espeak


[docs] class EspeakBackend: """High-level espeak backend for Kokoro TTS phonemization. This class provides a simple interface for converting text to phonemes using espeak-ng. It automatically converts espeak's IPA output to Kokoro's phoneme format. Example: >>> backend = EspeakBackend("en-us") >>> backend.phonemize("hello world") 'hˈɛlO wˈɜɹld' """
[docs] def __init__( self, language: str = "en-us", with_stress: bool = True, tie: str = "^", use_cli: bool = False, ) -> None: """Initialize the espeak backend. Args: language: Language code (e.g., "en-us", "en-gb", "fr-fr"). with_stress: Whether to include stress markers in output. tie: Tie character mode. "^" uses tie character for affricates. use_cli: If True, force use of CLI phonemizer instead of library. """ self.language = language self.with_stress = with_stress self.tie = tie self.use_cli = use_cli self._phonemizer: EspeakPhonemizerBase | None = None
@property def wrapper(self) -> EspeakPhonemizerBase: """Get the underlying Phonemizer instance (lazy initialization).""" if self._phonemizer is None and not self.use_cli: try: self._phonemizer = Phonemizer() self._phonemizer.set_voice(self.language) except Exception: self._phonemizer = CliPhonemizer( language=self.language, tie_char=self.tie ) elif self._phonemizer is None and self.use_cli: self._phonemizer = CliPhonemizer(language=self.language, tie_char=self.tie) if self._phonemizer is not None and self._phonemizer.voice is None: try: self._phonemizer.set_voice(self.language) except Exception: if not isinstance(self._phonemizer, CliPhonemizer): self._phonemizer = CliPhonemizer( language=self.language, tie_char=self.tie ) return cast(EspeakPhonemizerBase, self._phonemizer) @property def is_british(self) -> bool: """Check if using British English variant.""" return self.language.lower() in ("en-gb", "en_gb")
[docs] def remove_punctuation(self, text: str) -> str: """Remove punctuation from text before phonemization. Preserves: - Hyphens between letters: "my-world" → "my-world" - Apostrophes between letters: "don't" → "don't" - Single periods between letters: "Dr. Smith" → "Dr. Smith" Removes: - Quote marks around words: "'Hello'" → "Hello" - Repeated punctuation: "Hello??" → "Hello?" - Standalone punctuation: "!" → "", "?" → "" - Standalone dots: ".." → "" - Ellipsis sequences: "I like this ... . Hello." → "I like this. Hello." - Special sequences: "I dont't like you.!" → "I dont't like you." Enforces: - Single punctuation between words - Space after punctuation: "Hello,world" → "Hello, world" Preserves special symbols: @, #, etc. Args: text: Input text. Returns: Text with punctuation cleaned. """ # Placeholders for protected characters APOS_PROTECT = "__APOS__" HYPHEN_PROTECT = "__HYPHEN__" # Step 1: Protect apostrophes between letters (contractions) text = re.sub(r"(?<=\w)'(?=\w)", APOS_PROTECT, text) # Step 2: Protect hyphens between letters (compound words) text = re.sub(r"(?<=\w)-(?=\w)", HYPHEN_PROTECT, text) # Step 3: Remove quote marks (single and double) text = re.sub(r"[\"']", "", text) # Step 4: Remove spaces before punctuation text = re.sub(r"\s+([.,;:!?])", r"\1", text) # Step 5: Collapse repeated punctuation (?!;: to single) text = re.sub(r"([?!;:,])\1+", r"\1", text) # Step 6: Collapse dot sequences # First, handle ellipsis-like patterns: "..." becomes "." # But keep single period between letters (abbreviations) text = re.sub(r"\.{2,}", ".", text) # Step 7: Remove standalone punctuation (not attached to words) # Remove standalone !, ?, ;, : not between letters text = re.sub(r"(?<!\w)[?!;:,](?!\w)", "", text) # Step 8: Remove standalone dots text = re.sub(r"(?<!\w)\.(?!\w)", "", text) # Step 9: Clean up multiple spaces text = re.sub(r" +", " ", text) # Step 10: Enforce space after punctuation when followed by letter text = re.sub(r"([.,;:!?])(?=\w)", r"\1 ", text) # Step 11: Restore protected characters text = text.replace(APOS_PROTECT, "'") text = text.replace(HYPHEN_PROTECT, "-") # Step 12: Strip leading/trailing whitespace text = text.strip() return text
[docs] def phonemize( self, text: str, convert_to_kokoro: bool = True, remove_punctuation: bool = True, ) -> str: """Convert text to phonemes. Args: text: Text to convert to phonemes. convert_to_kokoro: If True, convert espeak IPA to Kokoro format. If False, return raw espeak IPA output. remove_punctuation: If True, remove punctuation before phonemization. Returns: Phoneme string. """ # Use tie character for better handling of affricates (dʒ, tʃ) use_tie = self.tie == "^" if remove_punctuation: text = self.remove_punctuation(text) raw_phonemes = self.wrapper.phonemize(text, use_tie=use_tie) if convert_to_kokoro: return from_espeak(raw_phonemes, british=self.is_british) return raw_phonemes
[docs] def phonemize_list( self, texts: list[str], convert_to_kokoro: bool = True, ) -> list[str]: """Convert multiple texts to phonemes. Args: texts: List of texts to convert. convert_to_kokoro: If True, convert to Kokoro format. Returns: List of phoneme strings. """ return [self.phonemize(text, convert_to_kokoro) for text in texts]
[docs] def word_phonemes( self, word: str, convert_to_kokoro: bool = True, ) -> str: """Convert a single word to phonemes. Args: word: Word to convert. convert_to_kokoro: If True, convert to Kokoro format. Returns: Phoneme string for the word (without separators). """ result = self.phonemize(word, convert_to_kokoro) # Clean up: remove separators and trailing whitespace return result.strip().replace("_", "")
@property def version(self) -> str: """Get espeak version as string (e.g., "1.51.1").""" return ".".join(str(v) for v in self.wrapper.version) def __repr__(self) -> str: return f"EspeakBackend(language={self.language!r})"