quantitative-literaturwissenschaft: import

This commit is contained in:
2026-03-14 07:18:15 +01:00
parent 9c57ea69f2
commit 832b13a694
3 changed files with 423 additions and 46 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,76 @@
import pandas as pd
import numpy as np
from typing import List, Tuple, Literal, Generator
from numpy.typing import NDArray
import itertools
import math
import random
type Binary = Literal[0] | Literal[1]
def get_transitions[A](series: List[A], step=1) -> List[Tuple[A, A]]:
cycled_series = series[step:] + series[:step]
return list(zip(series, cycled_series))
def transition_matrix[A](transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:
element, next_element = zip(*transitions)
crosstab = pd.crosstab(element, next_element)
return np.matrix(crosstab)
def correlation(matrix: NDArray[np.integer]) -> float:
if matrix.shape != (2, 2):
raise ValueError("The input matrix must be 2x2")
main_diagonal_product = matrix[0, 0] * matrix[1, 1]
other_diagonal_product = matrix[0, 1] * matrix[1, 0]
difference = main_diagonal_product - other_diagonal_product
row_sums = matrix.sum(axis=1)
col_sums = matrix.sum(axis=0)
product_of_sums = np.prod(row_sums) * np.prod(col_sums)
sqrt_product_of_sums = np.sqrt(product_of_sums)
return difference / sqrt_product_of_sums
def correlation_ranges[A](series: List[A]) -> Generator[float, None, None]:
step = 0
while True:
transitions = get_transitions(series, step=step)
matrix = transition_matrix(transitions)
current_correlation = correlation(matrix)
yield current_correlation
step += 1
type MetricalSyllable = Literal["-"] | Literal["u"]
sonett = list(14 * ((5 * "u-") + "u"))
limerick = list(2 * "u-uu-uu-u" + 2 * "u-uu-" + "u-uu-uu-u")
def get_hexameter(k: int = 1):
result = []
for _ in range(k):
hexameter = list("".join(random.choices(["-uu", "--"], k=5)) + random.choice(["--", "-u"]))
result.extend(hexameter)
return result
mat = transition_matrix(get_transitions(sonett))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(limerick))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(get_hexameter(k=2000)))
print(mat)
print(correlation(mat))
mat = transition_matrix(get_transitions(random.choices(["-", "u"], k=20000)))
print(mat)
print(correlation(mat))
print(list(itertools.islice(correlation_ranges(get_hexameter(k=2000)), 40)))