quantitative-literaturwissenschaft: import

This commit is contained in:
2026-03-14 07:18:15 +01:00
parent 9c57ea69f2
commit 832b13a694
3 changed files with 423 additions and 46 deletions

View File

@@ -0,0 +1,76 @@
import pandas as pd
import numpy as np
from typing import List, Tuple, Literal, Generator
from numpy.typing import NDArray
import itertools
import math
import random
type Binary = Literal[0] | Literal[1]
def get_transitions[A](series: List[A], step=1) -> List[Tuple[A, A]]:
cycled_series = series[step:] + series[:step]
return list(zip(series, cycled_series))
def transition_matrix[A](transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:
element, next_element = zip(*transitions)
crosstab = pd.crosstab(element, next_element)
return np.matrix(crosstab)
def correlation(matrix: NDArray[np.integer]) -> float:
if matrix.shape != (2, 2):
raise ValueError("The input matrix must be 2x2")
main_diagonal_product = matrix[0, 0] * matrix[1, 1]
other_diagonal_product = matrix[0, 1] * matrix[1, 0]
difference = main_diagonal_product - other_diagonal_product
row_sums = matrix.sum(axis=1)
col_sums = matrix.sum(axis=0)
product_of_sums = np.prod(row_sums) * np.prod(col_sums)
sqrt_product_of_sums = np.sqrt(product_of_sums)
return difference / sqrt_product_of_sums
def correlation_ranges[A](series: List[A]) -> Generator[float, None, None]:
step = 0
while True:
transitions = get_transitions(series, step=step)
matrix = transition_matrix(transitions)
current_correlation = correlation(matrix)
yield current_correlation
step += 1
type MetricalSyllable = Literal["-"] | Literal["u"]
sonett = list(14 * ((5 * "u-") + "u"))
limerick = list(2 * "u-uu-uu-u" + 2 * "u-uu-" + "u-uu-uu-u")
def get_hexameter(k: int = 1):
result = []
for _ in range(k):
hexameter = list("".join(random.choices(["-uu", "--"], k=5)) + random.choice(["--", "-u"]))
result.extend(hexameter)
return result
mat = transition_matrix(get_transitions(sonett))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(limerick))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(get_hexameter(k=2000)))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(random.choices(["-", "u"], k=20000)))
print(mat)
print(correlation(mat))
print(list(itertools.islice(correlation_ranges(get_hexameter(k=2000)), 40)))