Source code for kokorog2p.de.numbers

"""Number-to-words conversion for German G2P.

This module provides functions to convert numbers (digits, decimals, ordinals,
years, currency) into their German word representations for text-to-speech.

Based on the English numbers module, adapted for German language rules.
"""

import re
from collections.abc import Callable

# Ordinal suffixes in German
ORDINALS = frozenset([".", "te", "ter", "tes", "ten", "tem"])

# Currency symbols and their German word forms
CURRENCIES = {
    "€": ("Euro", "Cent"),
    "$": ("Dollar", "Cent"),
    "£": ("Pfund", "Pence"),
    "CHF": ("Franken", "Rappen"),
}


def is_digit(text: str) -> bool:
    """Check if text consists only of digits."""
    return bool(re.match(r"^[0-9]+$", text))


def is_currency_amount(word: str) -> bool:
    """Check if word looks like a currency amount (e.g., '12,99' or '12.99')."""
    # German uses comma as decimal separator
    parts = word.replace(".", "").split(",")
    if len(parts) > 2:
        return False
    return all(is_digit(p) for p in parts if p)


[docs] def number_to_german(n: int) -> str: """Convert an integer to German words. This is a fallback when num2words is not available. Args: n: Integer to convert. Returns: German word representation. """ if n < 0: return "minus " + number_to_german(-n) if n == 0: return "null" ones = [ "", "eins", "zwei", "drei", "vier", "fünf", "sechs", "sieben", "acht", "neun", "zehn", "elf", "zwölf", "dreizehn", "vierzehn", "fünfzehn", "sechzehn", "siebzehn", "achtzehn", "neunzehn", ] tens = [ "", "", "zwanzig", "dreißig", "vierzig", "fünfzig", "sechzig", "siebzig", "achtzig", "neunzig", ] if n < 20: return ones[n] elif n < 100: if n % 10 == 0: return tens[n // 10] elif n % 10 == 1: return "einund" + tens[n // 10] else: return ones[n % 10] + "und" + tens[n // 10] elif n < 1000: if n % 100 == 0: return ones[n // 100] + "hundert" if n // 100 > 1 else "hundert" else: prefix = ones[n // 100] + "hundert" if n // 100 > 1 else "hundert" return prefix + number_to_german(n % 100) elif n < 1000000: thousands = n // 1000 remainder = n % 1000 if thousands == 1: prefix = "eintausend" else: prefix = number_to_german(thousands) + "tausend" if remainder == 0: return prefix return prefix + number_to_german(remainder) elif n < 1000000000: millions = n // 1000000 remainder = n % 1000000 if millions == 1: prefix = "eine Million" else: prefix = number_to_german(millions) + " Millionen" if remainder == 0: return prefix return prefix + " " + number_to_german(remainder) elif n < 1000000000000: billions = n // 1000000000 remainder = n % 1000000000 if billions == 1: prefix = "eine Milliarde" else: prefix = number_to_german(billions) + " Milliarden" if remainder == 0: return prefix return prefix + " " + number_to_german(remainder) else: return str(n)
[docs] def ordinal_to_german(n: int) -> str: """Convert an integer to German ordinal words. Args: n: Integer to convert. Returns: German ordinal word representation. """ if n <= 0: return str(n) + "." # Special cases special = { 1: "erste", 3: "dritte", 7: "siebte", 8: "achte", } if n in special: return special[n] elif n < 20: return number_to_german(n) + "te" else: return number_to_german(n) + "ste"
[docs] class GermanNumberConverter: """Convert numbers to their German word representations. This class handles various number formats including: - Cardinal numbers (1, 2, 3 -> eins, zwei, drei) - Ordinal numbers (1., 2. -> erste, zweite) - Years (1984 -> neunzehnhundertvierundachtzig) - Decimals (3,14 -> drei Komma eins vier) - Currency (12,50€ -> zwölf Euro fünfzig) """
[docs] def __init__( self, lookup_fn: Callable[[str, str | None], str | None] | None = None, ) -> None: """Initialize the German number converter. Args: lookup_fn: Optional function to look up words in the lexicon. """ self.lookup = lookup_fn self._num2words: Callable | None = None
@property def num2words(self) -> Callable: """Lazily import num2words with German language.""" if self._num2words is None: try: from num2words import num2words def german_num2words(n, to="cardinal"): return num2words(n, lang="de", to=to) self._num2words = german_num2words except ImportError: # Fallback to built-in converter def fallback(n, to="cardinal"): if to == "ordinal": return ordinal_to_german(int(n)) return number_to_german(int(n)) self._num2words = fallback return self._num2words
[docs] def convert_cardinal(self, word: str) -> str: """Convert cardinal number to German words. Args: word: Number string (e.g., "42", "1.000"). Returns: German word representation. """ # Remove thousand separators (German uses . for thousands) word = word.replace(".", "") try: return self.num2words(int(word), to="cardinal") except (ValueError, OverflowError): return word
[docs] def convert_ordinal(self, word: str) -> str: """Convert ordinal number to German words. Args: word: Number string (e.g., "1", "42"). Returns: German ordinal word representation. """ word = word.replace(".", "") try: return self.num2words(int(word), to="ordinal") except (ValueError, OverflowError): return word
[docs] def convert_year(self, word: str) -> str: """Convert year to German words. Args: word: Year string (e.g., "1984", "2024"). Returns: German year word representation. """ try: year = int(word) if 1100 <= year <= 1999: # Traditional German year reading: 1984 -> neunzehnhundertvierundachtzig century = year // 100 remainder = year % 100 century_word = self.num2words(century, to="cardinal") if remainder == 0: return century_word + "hundert" remainder_word = self.num2words(remainder, to="cardinal") return century_word + "hundert" + remainder_word else: # Modern years: 2024 -> zweitausendvierundzwanzig return self.num2words(year, to="cardinal") except (ValueError, OverflowError): return word
[docs] def convert_decimal(self, word: str) -> str: """Convert decimal number to German words. German uses comma as decimal separator. Args: word: Decimal string (e.g., "3,14" or "3.14"). Returns: German word representation. """ # Normalize to comma (German style) word = word.replace(".", ",") parts = word.split(",") if len(parts) == 1: return self.convert_cardinal(parts[0]) integer_part = self.convert_cardinal(parts[0]) if parts[0] else "null" # Read decimal digits individually decimal_digits = " ".join( self.num2words(int(d), to="cardinal") for d in parts[1] ) return f"{integer_part} Komma {decimal_digits}"
[docs] def convert_currency(self, word: str, currency: str) -> str: """Convert currency amount to German words. Args: word: Amount string (e.g., "12,50"). currency: Currency symbol (e.g., "€"). Returns: German currency word representation. """ currency_names = CURRENCIES.get(currency, ("", "")) # Normalize decimal separator word = word.replace(".", ",") parts = word.split(",") result_parts = [] # Integer part (euros/dollars/etc.) if parts[0]: int_val = int(parts[0]) int_word = self.num2words(int_val, to="cardinal") unit = currency_names[0] result_parts.append(f"{int_word} {unit}") # Decimal part (cents/etc.) if len(parts) > 1 and parts[1] and int(parts[1]) > 0: dec_val = int(parts[1]) # Ensure two-digit cents if len(parts[1]) == 1: dec_val *= 10 dec_word = self.num2words(dec_val, to="cardinal") unit = currency_names[1] if len(currency_names) > 1 else "" if result_parts: result_parts.append(f"{dec_word} {unit}") else: result_parts.append(f"{dec_word} {unit}") return " ".join(result_parts).strip()
[docs] def convert( self, word: str, currency: str | None = None, is_ordinal: bool = False, is_year: bool = False, ) -> str: """Convert a number to its German word representation. Args: word: The number string to convert. currency: Optional currency symbol (e.g., '€'). is_ordinal: Whether to convert as ordinal. is_year: Whether to convert as year. Returns: German word representation. """ # Handle negative numbers negative = False if word.startswith("-"): negative = True word = word[1:] result = "" if is_ordinal or word.endswith("."): # Ordinal number word = word.rstrip(".") result = self.convert_ordinal(word) elif is_year and len(word) == 4 and is_digit(word): # Year result = self.convert_year(word) elif currency and currency in CURRENCIES: # Currency amount result = self.convert_currency(word, currency) elif "," in word or ("." in word and word.count(".") == 1): # Decimal number result = self.convert_decimal(word) else: # Cardinal number result = self.convert_cardinal(word) if negative: result = "minus " + result return result
[docs] def expand_number(text: str) -> str: """Expand numbers in text to German words. This is a convenience function for simple number expansion. Args: text: Text potentially containing numbers. Returns: Text with numbers expanded to German words. """ converter = GermanNumberConverter() def replace_number(match: re.Match) -> str: word = match.group(0) # Check for currency currency = None if word.endswith("€"): currency = "€" word = word[:-1] elif word.startswith("€"): currency = "€" word = word[1:] # Check for ordinal (ends with .) is_ordinal = word.endswith(".") and is_digit(word.rstrip(".")) # Check for year (4-digit standalone number) is_year = len(word) == 4 and is_digit(word) return converter.convert( word, currency=currency, is_ordinal=is_ordinal, is_year=is_year ) # Match numbers with optional currency symbols pattern = r"€?\-?[\d.,]+€?\.?" return re.sub(pattern, replace_number, text)