quantitative-literaturwissenschaft: import
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,76 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from typing import List, Tuple, Literal, Generator
|
||||||
|
from numpy.typing import NDArray
|
||||||
|
import itertools
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
|
||||||
|
type Binary = Literal[0] | Literal[1]
|
||||||
|
|
||||||
|
def get_transitions[A](series: List[A], step=1) -> List[Tuple[A, A]]:
|
||||||
|
cycled_series = series[step:] + series[:step]
|
||||||
|
return list(zip(series, cycled_series))
|
||||||
|
|
||||||
|
def transition_matrix[A](transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:
|
||||||
|
element, next_element = zip(*transitions)
|
||||||
|
crosstab = pd.crosstab(element, next_element)
|
||||||
|
return np.matrix(crosstab)
|
||||||
|
|
||||||
|
|
||||||
|
def correlation(matrix: NDArray[np.integer]) -> float:
|
||||||
|
if matrix.shape != (2, 2):
|
||||||
|
raise ValueError("The input matrix must be 2x2")
|
||||||
|
|
||||||
|
main_diagonal_product = matrix[0, 0] * matrix[1, 1]
|
||||||
|
other_diagonal_product = matrix[0, 1] * matrix[1, 0]
|
||||||
|
difference = main_diagonal_product - other_diagonal_product
|
||||||
|
|
||||||
|
row_sums = matrix.sum(axis=1)
|
||||||
|
col_sums = matrix.sum(axis=0)
|
||||||
|
product_of_sums = np.prod(row_sums) * np.prod(col_sums)
|
||||||
|
|
||||||
|
sqrt_product_of_sums = np.sqrt(product_of_sums)
|
||||||
|
|
||||||
|
return difference / sqrt_product_of_sums
|
||||||
|
|
||||||
|
def correlation_ranges[A](series: List[A]) -> Generator[float, None, None]:
|
||||||
|
step = 0
|
||||||
|
while True:
|
||||||
|
transitions = get_transitions(series, step=step)
|
||||||
|
matrix = transition_matrix(transitions)
|
||||||
|
current_correlation = correlation(matrix)
|
||||||
|
yield current_correlation
|
||||||
|
step += 1
|
||||||
|
|
||||||
|
type MetricalSyllable = Literal["-"] | Literal["u"]
|
||||||
|
|
||||||
|
sonett = list(14 * ((5 * "u-") + "u"))
|
||||||
|
|
||||||
|
limerick = list(2 * "u-uu-uu-u" + 2 * "u-uu-" + "u-uu-uu-u")
|
||||||
|
|
||||||
|
def get_hexameter(k: int = 1):
|
||||||
|
result = []
|
||||||
|
for _ in range(k):
|
||||||
|
hexameter = list("".join(random.choices(["-uu", "--"], k=5)) + random.choice(["--", "-u"]))
|
||||||
|
result.extend(hexameter)
|
||||||
|
return result
|
||||||
|
|
||||||
|
mat = transition_matrix(get_transitions(sonett))
|
||||||
|
print(mat)
|
||||||
|
print(correlation(mat))
|
||||||
|
|
||||||
|
mat = transition_matrix(get_transitions(limerick))
|
||||||
|
print(mat)
|
||||||
|
print(correlation(mat))
|
||||||
|
|
||||||
|
|
||||||
|
mat = transition_matrix(get_transitions(get_hexameter(k=2000)))
|
||||||
|
print(mat)
|
||||||
|
print(correlation(mat))
|
||||||
|
|
||||||
|
mat = transition_matrix(get_transitions(random.choices(["-", "u"], k=20000)))
|
||||||
|
print(mat)
|
||||||
|
print(correlation(mat))
|
||||||
|
|
||||||
|
print(list(itertools.islice(correlation_ranges(get_hexameter(k=2000)), 40)))
|
||||||
Reference in New Issue
Block a user