"""Expression utilities for SOREs and k-OREs.""" import re def sym(s): """Create a simple symbol expression.""" return s def concat(*parts): """Create concatenation expression.""" parts = [p for p in parts if p and p != 'ε'] if not parts: return 'ε' if len(parts) == 1: return parts[0] return '.'.join(parts) def disj(*parts): """Create disjunction expression.""" parts = [p for p in parts if p and p != '∅'] if not parts: return '∅' if len(parts) == 1: return parts[0] return '(' + '|'.join(parts) + ')' def star(expr): """Create iteration expression (one or more, r+).""" if not expr or expr in ('∅', 'ε'): return expr if len(expr) == 1 or (expr.startswith('(') and expr.endswith(')')): return expr + '+' return '(' + expr + ')+' def optional(expr): """Create optional expression (r?).""" if not expr or expr in ('∅', 'ε'): return 'ε' if len(expr) == 1 or (expr.startswith('(') and expr.endswith(')')): return expr + '?' return '(' + expr + ')?' def alphabet(expr): """Return set of alphabet symbols in expression.""" cleaned = re.sub(r'[+?*().|]', ' ', expr) result = set() for token in cleaned.split(): token = token.strip('_0123456789') if token and token not in ('ε', '∅'): result.add(token) return result def strip_k(s): """Remove k-ORE markers: a_1 → a, b^(2) → b.""" result = re.sub(r'_\d+', '', s) result = re.sub(r'\^\(\d+\)', '', result) result = re.sub(r'^\(|\)$', '', result) return result def has_repeats(expr, symbol): """Check if a symbol appears more than once in expression.""" return expr.count(symbol) > 1 def lang_size_at_most(expr, n, alphabet_symbols=None): """Compute |L(r)<=n| — number of words of length ≤ n in L(r).""" if alphabet_symbols is None: alphabet_symbols = alphabet(expr) if not alphabet_symbols: return 1 if 'ε' in expr else 0 size = 0 for length in range(n + 1): size += _count_words(expr, length, alphabet_symbols) return size def _count_words(expr, length, alphabet_symbols): if length < 0: return 0 if not expr or expr == '∅': return 0 if expr == 'ε': return 1 if length == 0 else 0 if expr in alphabet_symbols: return 1 if length == 1 else 0 if '+' in expr: inner = expr.rstrip('+') if inner.endswith('?'): inner = inner[:-1] return _count_star_words(inner, length, alphabet_symbols, 1) if expr.endswith('?'): inner = expr[:-1] return _count_words(inner, length, alphabet_symbols) + (1 if length == 0 else 0) if expr.startswith('(') and '|' in expr: inner = expr[1:-1] parts = _split_disjunction(inner) return sum(_count_words(p, length, alphabet_symbols) for p in parts) if '.' in expr: parts = expr.split('.') return _count_concat_words(parts, length, alphabet_symbols, 0) if ')' in expr or '(' in expr: return 0 return 0 def _count_concat_words(parts, length, alphabet_symbols, idx): if idx >= len(parts): return 1 if length == 0 else 0 total = 0 for take in range(length + 1): cnt = _count_words(parts[idx], take, alphabet_symbols) if cnt > 0: rest = _count_concat_words(parts, length - take, alphabet_symbols, idx + 1) total += cnt * rest return total def _count_star_words(inner, length, alphabet_symbols, min_count): total = 0 for repeat in range(min_count, length + 1): if repeat == 0: continue total += _count_repeat_words(inner, repeat, length, alphabet_symbols) return total def _count_repeat_words(inner, repeat, length, alphabet_symbols): if repeat == 0: return 1 if length == 0 else 0 total = 0 for take in range(length + 1): cnt = _count_words(inner, take, alphabet_symbols) if cnt > 0: rest = _count_repeat_words(inner, repeat - 1, length - take, alphabet_symbols) total += cnt * rest return total def _split_disjunction(s): depth = 0 parts = [] current = [] for ch in s: if ch == '(': depth += 1 current.append(ch) elif ch == ')': depth -= 1 current.append(ch) elif ch == '|' and depth == 0: parts.append(''.join(current)) current = [] else: current.append(ch) parts.append(''.join(current)) return parts