import pandas as pd import numpy as np from typing import List, Tuple, Literal, Generator from numpy.typing import NDArray import itertools import math import random type Binary = Literal[0] | Literal[1] def get_transitions[A](series: List[A], step=1) -> List[Tuple[A, A]]: cycled_series = series[step:] + series[:step] return list(zip(series, cycled_series)) def transition_matrix[A](transitions: List[Tuple[A, A]]) -> NDArray[np.integer]: element, next_element = zip(*transitions) crosstab = pd.crosstab(element, next_element) return np.matrix(crosstab) def correlation(matrix: NDArray[np.integer]) -> float: if matrix.shape != (2, 2): raise ValueError("The input matrix must be 2x2") main_diagonal_product = matrix[0, 0] * matrix[1, 1] other_diagonal_product = matrix[0, 1] * matrix[1, 0] difference = main_diagonal_product - other_diagonal_product row_sums = matrix.sum(axis=1) col_sums = matrix.sum(axis=0) product_of_sums = np.prod(row_sums) * np.prod(col_sums) sqrt_product_of_sums = np.sqrt(product_of_sums) return difference / sqrt_product_of_sums def correlation_ranges[A](series: List[A]) -> Generator[float, None, None]: step = 0 while True: transitions = get_transitions(series, step=step) matrix = transition_matrix(transitions) current_correlation = correlation(matrix) yield current_correlation step += 1 type MetricalSyllable = Literal["-"] | Literal["u"] sonett = list(14 * ((5 * "u-") + "u")) limerick = list(2 * "u-uu-uu-u" + 2 * "u-uu-" + "u-uu-uu-u") def get_hexameter(k: int = 1): result = [] for _ in range(k): hexameter = list("".join(random.choices(["-uu", "--"], k=5)) + random.choice(["--", "-u"])) result.extend(hexameter) return result mat = transition_matrix(get_transitions(sonett)) print(mat) print(correlation(mat)) mat = transition_matrix(get_transitions(limerick)) print(mat) print(correlation(mat)) mat = transition_matrix(get_transitions(get_hexameter(k=2000))) print(mat) print(correlation(mat)) mat = transition_matrix(get_transitions(random.choices(["-", "u"], k=20000))) print(mat) print(correlation(mat)) print(list(itertools.islice(correlation_ranges(get_hexameter(k=2000)), 40)))