Files
to-hen/quantitative-literaturwissenschaft/quantitative_literaturwissenschaft.py

77 lines
2.2 KiB
Python

import pandas as pd
import numpy as np
from typing import List, Tuple, Literal, Generator
from numpy.typing import NDArray
import itertools
import math
import random
type Binary = Literal[0] | Literal[1]
def get_transitions[A](series: List[A], step=1) -> List[Tuple[A, A]]:
cycled_series = series[step:] + series[:step]
return list(zip(series, cycled_series))
def transition_matrix[A](transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:
element, next_element = zip(*transitions)
crosstab = pd.crosstab(element, next_element)
return np.matrix(crosstab)
def correlation(matrix: NDArray[np.integer]) -> float:
if matrix.shape != (2, 2):
raise ValueError("The input matrix must be 2x2")
main_diagonal_product = matrix[0, 0] * matrix[1, 1]
other_diagonal_product = matrix[0, 1] * matrix[1, 0]
difference = main_diagonal_product - other_diagonal_product
row_sums = matrix.sum(axis=1)
col_sums = matrix.sum(axis=0)
product_of_sums = np.prod(row_sums) * np.prod(col_sums)
sqrt_product_of_sums = np.sqrt(product_of_sums)
return difference / sqrt_product_of_sums
def correlation_ranges[A](series: List[A]) -> Generator[float, None, None]:
step = 0
while True:
transitions = get_transitions(series, step=step)
matrix = transition_matrix(transitions)
current_correlation = correlation(matrix)
yield current_correlation
step += 1
type MetricalSyllable = Literal["-"] | Literal["u"]
sonett = list(14 * ((5 * "u-") + "u"))
limerick = list(2 * "u-uu-uu-u" + 2 * "u-uu-" + "u-uu-uu-u")
def get_hexameter(k: int = 1):
result = []
for _ in range(k):
hexameter = list("".join(random.choices(["-uu", "--"], k=5)) + random.choice(["--", "-u"]))
result.extend(hexameter)
return result
mat = transition_matrix(get_transitions(sonett))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(limerick))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(get_hexameter(k=2000)))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(random.choices(["-", "u"], k=20000)))
print(mat)
print(correlation(mat))
print(list(itertools.islice(correlation_ranges(get_hexameter(k=2000)), 40)))