"""German G2P (Grapheme-to-Phoneme) converter.
Grapheme to Phoneme for German language using dictionary lookup
with rule-based fallback.
German Phonology features:
- Final obstruent devoicing (Auslautverhärtung)
- Vowel length distinction
- Umlauts (ä, ö, ü)
- ß (Eszett)
- CH as [ç] or [x] depending on context (ich-Laut vs ach-Laut)
- Voicing assimilation in consonant clusters
- Schwa in unstressed syllables
Reference:
https://en.wikipedia.org/wiki/Standard_German_phonology
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Final
from kokorog2p.base import G2PBase
from kokorog2p.pipeline.tokenizer import RegexTokenizer, SpacyTokenizer
from kokorog2p.token import GToken
from kokorog2p.tokenization import ensure_gtoken_positions
if TYPE_CHECKING:
from kokorog2p.de.lexicon import GermanLexicon
# =============================================================================
# German Phoneme Mappings
# =============================================================================
# Basic IPA mappings for German graphemes
# Note: Many mappings are context-dependent and handled by rules
IPA: Final[dict[str, str]] = {
# Vowels - short
"a": "a",
"e": "ɛ",
"i": "ɪ",
"o": "ɔ",
"u": "ʊ",
"ä": "ɛ",
"ö": "œ",
"ü": "ʏ",
"y": "ʏ",
# Vowels - long (marked with doubling or followed by h/single consonant)
"aa": "aː",
"ee": "eː",
"ie": "iː",
"oo": "oː",
"uh": "uː",
"äh": "ɛː",
"öh": "øː",
"üh": "yː",
"ah": "aː",
"eh": "eː",
"ih": "iː",
"oh": "oː",
# Diphthongs
"ei": "aɪ",
"ai": "aɪ",
"ey": "aɪ",
"ay": "aɪ",
"au": "aʊ",
"eu": "ɔʏ",
"äu": "ɔʏ",
# Consonants
"b": "b",
"c": "k",
"d": "d",
"f": "f",
"g": "ɡ",
"h": "h",
"j": "j",
"k": "k",
"l": "l",
"m": "m",
"n": "n",
"p": "p",
"q": "k",
"r": "ʁ",
"s": "z", # Default voiced, devoiced in certain contexts
"t": "t",
"v": "f", # Usually [f] in German
"w": "v",
"x": "ks",
"z": "ʦ",
"ß": "s",
# Digraphs and trigraphs
"ch": "x", # Default ach-Laut, ich-Laut handled by rule
"ck": "k",
"dt": "t",
"ng": "ŋ",
"nk": "ŋk",
"ph": "f",
"pf": "pf",
"qu": "kv",
"sch": "ʃ",
"sp": "ʃp", # Word-initial
"st": "ʃt", # Word-initial
"ss": "s",
"th": "t",
"tz": "ʦ",
"tsch": "ʧ",
"dsch": "ʤ",
"chs": "ks",
}
# Voiced-unvoiced consonant pairs for final devoicing
VOICED_TO_UNVOICED: Final[dict[str, str]] = {
"b": "p",
"d": "t",
"g": "k",
"ɡ": "k", # IPA g (U+0261)
"v": "f",
"z": "s",
"ʒ": "ʃ",
}
def _is_front_vowel_context(prev_chars: str) -> bool:
"""Check if the previous character(s) form a front vowel context for ich-Laut."""
prev_lower = prev_chars.lower()
# Check for front vowels and consonants l, n, r
if prev_lower in ("i", "e", "ä", "ö", "ü", "y"):
return True
if prev_lower in ("l", "n", "r"):
return True
# Check for diphthongs ending in front vowel
if prev_lower.endswith(("ei", "ai", "eu", "äu", "ie", "ey", "ay")):
return True
return False
def normalize_to_kokoro(phonemes: str, use_tie_replacement: bool = False) -> str:
"""Normalize German phonemes to Kokoro-compatible format.
Converts combining diacritics to precomposed characters that exist
in the Kokoro TTS vocabulary.
Args:
phonemes: IPA phoneme string potentially containing combining diacritics.
use_tie_replacement: If True, replace tie characters (͡) with special
phonmes. Default is False.
Returns:
Normalized phoneme string compatible with Kokoro vocab.
"""
if not phonemes:
return phonemes
# Replace tie characters (U+0361) with special phonemes if requested
if use_tie_replacement:
phonemes = phonemes.replace("͡", "^")
phonemes = phonemes.replace("a^ɪ", "I")
phonemes = phonemes.replace("a^ʊ", "W")
phonemes = phonemes.replace("d^z", "ʣ")
phonemes = phonemes.replace("d^ʒ", "ʤ")
phonemes = phonemes.replace("e^ɪ", "A")
phonemes = phonemes.replace("o^ʊ", "O")
phonemes = phonemes.replace("ə^ʊ", "Q")
phonemes = phonemes.replace("s^s", "S")
phonemes = phonemes.replace("t^s", "ʦ")
phonemes = phonemes.replace("t^ʃ", "ʧ")
phonemes = phonemes.replace("ɔ^ɪ", "Y")
# Remove non-syllabic markers from diphthongs (U+032F)
# The diphthongs work without this marker in Kokoro
phonemes = phonemes.replace("\u032f", "") # COMBINING INVERTED BREVE BELOW
# Remove syllabic consonant marker (U+0329)
# Syllabic consonants like n̩, l̩, m̩ work without this marker in Kokoro
phonemes = phonemes.replace("\u0329", "") # COMBINING VERTICAL LINE BELOW
# Replace IPA characters not in Kokoro vocab with closest equivalents
phonemes = phonemes.replace("ʏ", "y") # LATIN SMALL CAPITAL Y -> lowercase y
return phonemes
[docs]
class GermanG2P(G2PBase):
"""German G2P converter using dictionary lookup with fallback options.
This class provides grapheme-to-phoneme conversion for German text
using a large dictionary (738k+ entries) with fallback to espeak-ng
or goruut for out-of-vocabulary words and phonological rules.
Example:
>>> g2p = GermanG2P()
>>> tokens = g2p("Guten Tag")
>>> for token in tokens:
... print(f"{token.text} -> {token.phonemes}")
"""
[docs]
def __init__(
self,
language: str = "de-de",
use_espeak_fallback: bool = True,
use_goruut_fallback: bool = False,
use_spacy: bool = False,
spacy_model: str = "de_core_news_sm",
use_lexicon: bool = True,
strip_stress: bool = True,
load_silver: bool = True,
load_gold: bool = True,
version: str = "1.0",
expand_abbreviations: bool = True,
enable_context_detection: bool = True,
**kwargs: Any,
) -> None:
"""Initialize the German G2P converter.
Args:
language: Language code (default: 'de-de').
use_espeak_fallback: Whether to use espeak for OOV words.
use_goruut_fallback: Whether to use goruut for OOV words.
use_spacy: Whether to use spaCy for tokenization and POS tagging.
Defaults to False to preserve legacy behavior and avoid requiring
spaCy model downloads unless explicitly requested.
spacy_model: spaCy German model package to load when use_spacy=True
(e.g., "de_core_news_sm", "de_core_news_md", "de_core_news_lg").
use_lexicon: Whether to use dictionary lookup (default: True).
strip_stress: Whether to remove stress markers from lexicon output.
load_silver: If True, load silver tier dictionary if available.
Currently German only has gold dictionary, so this parameter
is reserved for future use and consistency with English.
Defaults to True for consistency.
load_gold: If True, load gold tier dictionary.
Defaults to True for maximum quality and coverage.
Set to False when ultra-fast initialization is needed.
expand_abbreviations: Whether to expand abbreviations (Prof. → Professor).
enable_context_detection: Context-aware abbreviation expansion.
Raises:
ValueError: If both use_espeak_fallback and use_goruut_fallback are True.
"""
# Validate mutual exclusion
if use_espeak_fallback and use_goruut_fallback:
raise ValueError(
"Cannot use both espeak and goruut fallback simultaneously. "
"Please set only one of use_espeak_fallback or "
"use_goruut_fallback to True."
)
super().__init__(
language=language,
use_espeak_fallback=use_espeak_fallback,
use_goruut_fallback=use_goruut_fallback,
)
self.version = version
self._lexicon: GermanLexicon | None = None
self._fallback: Any = None
self._strip_stress = strip_stress
self.use_spacy = use_spacy
self.spacy_model = spacy_model
# Initialize spaCy and tokenizers (lazy)
self._nlp: object | None = None
self._regex_tokenizer: RegexTokenizer | None = None
self._spacy_tokenizer: SpacyTokenizer | None = None
# Initialize normalizer
from kokorog2p.de.normalizer import GermanNormalizer
self._normalizer = GermanNormalizer(
track_changes=False,
expand_abbreviations=expand_abbreviations,
enable_context_detection=enable_context_detection,
)
if use_lexicon:
try:
from kokorog2p.de.lexicon import GermanLexicon
self._lexicon = GermanLexicon(
strip_stress=strip_stress,
load_silver=load_silver,
load_gold=load_gold,
)
except ImportError:
pass
# Initialize fallback (lazy)
if use_goruut_fallback:
try:
from kokorog2p.de.fallback import GermanGoruutFallback
self._fallback = GermanGoruutFallback()
except ImportError:
pass
elif use_espeak_fallback:
try:
from kokorog2p.de.fallback import GermanEspeakFallback
self._fallback = GermanEspeakFallback()
except ImportError:
pass
@property
def nlp(self) -> object:
"""Lazily initialize spaCy."""
if self._nlp is None:
import spacy
name = self.spacy_model
if not spacy.util.is_package(name):
spacy.cli.download(name) # type: ignore[attr-defined]
self._nlp = spacy.load(name, enable=["tok2vec", "tagger"])
return self._nlp
@property
def regex_tokenizer(self) -> RegexTokenizer:
"""Lazily initialize the regex tokenizer."""
if self._regex_tokenizer is None:
self._regex_tokenizer = RegexTokenizer(
track_positions=True,
use_bracket_matching=True,
lang=self.language,
)
return self._regex_tokenizer
@property
def spacy_tokenizer(self) -> SpacyTokenizer:
"""Lazily initialize the spaCy tokenizer."""
if self._spacy_tokenizer is None:
self._spacy_tokenizer = SpacyTokenizer(
nlp=self.nlp,
track_positions=True,
use_bracket_matching=True,
lang=self.language,
)
return self._spacy_tokenizer
[docs]
def __call__(self, text: str) -> list[GToken]:
"""Convert text to a list of tokens with phonemes.
Args:
text: Input text to convert.
Returns:
List of GToken objects with phonemes assigned.
"""
if not text or not text.strip():
return []
# Normalize text (expand abbreviations, normalize quotes, etc.)
text = self._normalizer(text)
tokens = (
self._tokenize_spacy(text)
if self.use_spacy
else self._tokenize_simple(text)
)
for token in tokens:
word = token.text
# Handle punctuation
if not any(c.isalnum() for c in word):
token.phonemes = self._get_punct_phonemes(word)
token.set("rating", 4)
continue
# Try lexicon first
phonemes = None
if self._lexicon:
phonemes = self._lexicon.lookup(word, token.tag)
if phonemes:
token.phonemes = normalize_to_kokoro(phonemes)
token.set("rating", 5) # Dictionary lookup = highest rating
# Fallback to espeak or goruut
if not phonemes and self._fallback:
fallback_result = self._fallback(word)
phonemes = fallback_result[0]
if phonemes:
token.phonemes = phonemes
token.set("rating", 3) # Fallback
# Fallback to rules
if not phonemes:
phonemes = self._word_to_phonemes(word)
if phonemes:
token.phonemes = normalize_to_kokoro(phonemes)
token.set("rating", 2) # Rule-based
if not phonemes:
token.phonemes = "?"
token.set("rating", 0)
ensure_gtoken_positions(tokens, text)
return tokens
def _tokenize_spacy(self, text: str) -> list[GToken]:
"""Tokenize text using spaCy."""
processing_tokens = self.spacy_tokenizer.tokenize(text)
return [ptoken.to_gtoken() for ptoken in processing_tokens]
def _tokenize_simple(self, text: str) -> list[GToken]:
"""Tokenize text using regex tokenizer."""
processing_tokens = self.regex_tokenizer.tokenize(text)
return [ptoken.to_gtoken() for ptoken in processing_tokens]
def _word_to_phonemes(self, word: str) -> str:
"""Convert a single word to phonemes using German rules.
Args:
word: Word to convert.
Returns:
Phoneme string in IPA.
"""
text = word.lower()
result: list[str] = []
i = 0
n = len(text)
while i < n:
matched = False
# Try to match multi-character sequences first (longest match)
for length in (4, 3, 2):
if i + length <= n:
chunk = text[i : i + length]
# Special handling for 'ch'
if chunk == "ch" and length == 2:
# ich-Laut vs ach-Laut
if i == 0:
# Word-initial ch (loan words) -> [ç] or [k]
result.append("ç")
elif i > 0 and _is_front_vowel_context(text[i - 1]):
# After front vowels and l, n, r -> ich-Laut [ç]
result.append("ç")
else:
# After back vowels a, o, u, au -> ach-Laut [x]
result.append("x")
i += 2
matched = True
break
# Special handling for word-initial sp, st
if chunk in ("sp", "st") and length == 2:
if i == 0:
# Word-initial sp/st -> [ʃp]/[ʃt]
result.append("ʃ")
result.append(chunk[1])
i += 2
matched = True
break
# Not word-initial, handle normally
continue
# Special handling for 'ig' at word end
if chunk == "ig" and length == 2 and i + 2 == n:
result.append("ɪ")
result.append("ç") # -ig -> [ɪç] at word end
i += 2
matched = True
break
# Check if chunk is in IPA mappings
if chunk in IPA:
result.append(IPA[chunk])
i += length
matched = True
break
if matched:
continue
# Single character
char = text[i]
# Special handling for vowels - check for length
if char in "aeiouäöü":
phoneme = self._get_vowel_phoneme(text, i)
result.append(phoneme)
i += 1
continue
# Special handling for 's'
if char == "s":
# Word-final or before unvoiced consonant -> [s]
if i == n - 1:
result.append("s")
elif i + 1 < n and text[i + 1] in "ptk":
result.append("s")
else:
# Before vowel -> [z]
result.append("z")
i += 1
continue
# Special handling for 'v'
if char == "v":
# In some loan words, 'v' is [v], but default is [f]
result.append("f")
i += 1
continue
# Special handling for 'r'
if char == "r":
# Vocalized r at end of syllable/word often becomes [ɐ]
# For simplicity, use [ʁ] everywhere
result.append("ʁ")
i += 1
continue
# Check IPA mapping
if char in IPA:
result.append(IPA[char])
elif char.isalpha():
# Unknown letter, keep as-is or use placeholder
result.append(char)
# Skip non-alphabetic characters
i += 1
# Apply final devoicing
result = self._apply_final_devoicing(result)
return "".join(result)
def _get_vowel_phoneme(self, text: str, pos: int) -> str:
"""Determine the correct vowel phoneme based on context.
German vowel length is determined by syllable structure:
- Long in open syllables (single consonant + vowel follows)
- Long before single consonant at word end in many cases
- Short before consonant clusters
- Schwa (ə) in unstressed endings like -e, -en, -el, -er
Args:
text: The full word text.
pos: Position of the vowel in the word.
Returns:
IPA phoneme for the vowel.
"""
char = text[pos]
# Short vowel mappings
short_vowels: dict[str, str] = {
"a": "a",
"e": "ɛ",
"i": "ɪ",
"o": "ɔ",
"u": "ʊ",
"ä": "ɛ",
"ö": "œ",
"ü": "ʏ",
}
# Long vowel mappings
long_vowels: dict[str, str] = {
"a": "aː",
"e": "eː",
"i": "iː",
"o": "oː",
"u": "uː",
"ä": "ɛː",
"ö": "øː",
"ü": "yː",
}
# Check what follows
remaining = text[pos + 1 :]
# Special handling for 'e' - check for schwa
if char == "e":
# Word-final -e -> schwa
if not remaining:
return "ə"
# -en, -el, -er at word end -> schwa
if remaining in ("n", "l", "r", "m", "ns", "ln", "rn", "ls", "rs"):
return "ə"
# -end, -ent, -ens at word end (but not stressed like 'Trend')
if remaining in ("nd", "nt", "ns") and pos > 0:
return "ə"
# Before 'h' + vowel or word end -> long
if remaining.startswith("h") and (
len(remaining) == 1 or remaining[1:2] in "aeiouäöü"
):
return long_vowels.get(char, short_vowels.get(char, char))
# Word-final vowel -> usually short except in some words
if not remaining:
return short_vowels.get(char, char)
# Before single consonant at word end -> often long
if len(remaining) == 1 and remaining[0] in "bcdfghjklmnpqrstvwxz":
# Common pattern: V + single C at end = long vowel
return long_vowels.get(char, short_vowels.get(char, char))
# Before 'ch', 'ß' -> depends on word, often long
if remaining.startswith(("ch", "ß")):
return long_vowels.get(char, short_vowels.get(char, char))
# Before single consonant + vowel (open syllable) -> long
if (
len(remaining) >= 2
and remaining[0] in "bcdfghjklmnpqrstvwxz"
and remaining[0] not in "ck" # ck indicates short vowel
and remaining[1] in "aeiouäöü"
):
# But 'sch' is one sound, not cluster
if not remaining.startswith("sch"):
return long_vowels.get(char, short_vowels.get(char, char))
# Before consonant cluster -> short
if (
len(remaining) >= 2
and remaining[0] in "bcdfghjklmnpqrstvwxz"
and remaining[1] in "bcdfghjklmnpqrstvwxz"
):
return short_vowels.get(char, char)
# Default to short
return short_vowels.get(char, char)
def _apply_final_devoicing(self, phonemes: list[str]) -> list[str]:
"""Apply German final obstruent devoicing (Auslautverhärtung).
Only applies to the final consonant cluster of the word.
Args:
phonemes: List of phoneme strings.
Returns:
Modified list with final devoicing applied.
"""
if not phonemes:
return phonemes
vowels = frozenset(
[
"a",
"aː",
"e",
"eː",
"ɛ",
"ɛː",
"i",
"iː",
"ɪ",
"o",
"oː",
"ɔ",
"u",
"uː",
"ʊ",
"y",
"yː",
"ʏ",
"ø",
"øː",
"œ",
"ə",
"ɐ",
"aɪ",
"aʊ",
"ɔʏ",
]
)
# Find the last consonant cluster (after the last vowel)
last_vowel_idx = -1
for i in range(len(phonemes) - 1, -1, -1):
if phonemes[i] in vowels:
last_vowel_idx = i
break
# Devoice all voiced obstruents after the last vowel
for i in range(last_vowel_idx + 1, len(phonemes)):
phone = phonemes[i]
if phone in VOICED_TO_UNVOICED:
phonemes[i] = VOICED_TO_UNVOICED[phone]
return phonemes
@staticmethod
def _get_punct_phonemes(text: str) -> str:
"""Get phonemes for punctuation tokens.
Only includes punctuation that exists in the Kokoro vocabulary.
"""
# Punctuation marks that exist in Kokoro vocab
# See kokorog2p/data/kokoro_config.json
puncts = frozenset(';:,.!?"()')
return "".join(c for c in text if c in puncts)
[docs]
def lookup(self, word: str, tag: str | None = None) -> str | None:
"""Look up a word in the dictionary.
Args:
word: The word to look up.
tag: Optional POS tag (not used for German).
Returns:
Phoneme string if found, None otherwise.
"""
if self._lexicon:
return self._lexicon.lookup(word)
return None
[docs]
def phonemize(self, text: str) -> str:
"""Convert text to a phoneme string.
Args:
text: Input text to convert.
Returns:
Phoneme string.
"""
tokens = self(text)
return " ".join(t.phonemes or "" for t in tokens if t.phonemes)
[docs]
def get_target_model(self) -> str:
"""Get the target Kokoro model variant for this G2P instance.
Returns:
Model identifier: version string ("1.1" or "1.0").
"""
return self.version