quantitative-literaturwissenschaft: import
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,76 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import List, Tuple, Literal, Generator
|
||||
from numpy.typing import NDArray
|
||||
import itertools
|
||||
import math
|
||||
import random
|
||||
|
||||
type Binary = Literal[0] | Literal[1]
|
||||
|
||||
def get_transitions[A](series: List[A], step=1) -> List[Tuple[A, A]]:
|
||||
cycled_series = series[step:] + series[:step]
|
||||
return list(zip(series, cycled_series))
|
||||
|
||||
def transition_matrix[A](transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:
|
||||
element, next_element = zip(*transitions)
|
||||
crosstab = pd.crosstab(element, next_element)
|
||||
return np.matrix(crosstab)
|
||||
|
||||
|
||||
def correlation(matrix: NDArray[np.integer]) -> float:
|
||||
if matrix.shape != (2, 2):
|
||||
raise ValueError("The input matrix must be 2x2")
|
||||
|
||||
main_diagonal_product = matrix[0, 0] * matrix[1, 1]
|
||||
other_diagonal_product = matrix[0, 1] * matrix[1, 0]
|
||||
difference = main_diagonal_product - other_diagonal_product
|
||||
|
||||
row_sums = matrix.sum(axis=1)
|
||||
col_sums = matrix.sum(axis=0)
|
||||
product_of_sums = np.prod(row_sums) * np.prod(col_sums)
|
||||
|
||||
sqrt_product_of_sums = np.sqrt(product_of_sums)
|
||||
|
||||
return difference / sqrt_product_of_sums
|
||||
|
||||
def correlation_ranges[A](series: List[A]) -> Generator[float, None, None]:
|
||||
step = 0
|
||||
while True:
|
||||
transitions = get_transitions(series, step=step)
|
||||
matrix = transition_matrix(transitions)
|
||||
current_correlation = correlation(matrix)
|
||||
yield current_correlation
|
||||
step += 1
|
||||
|
||||
type MetricalSyllable = Literal["-"] | Literal["u"]
|
||||
|
||||
sonett = list(14 * ((5 * "u-") + "u"))
|
||||
|
||||
limerick = list(2 * "u-uu-uu-u" + 2 * "u-uu-" + "u-uu-uu-u")
|
||||
|
||||
def get_hexameter(k: int = 1):
|
||||
result = []
|
||||
for _ in range(k):
|
||||
hexameter = list("".join(random.choices(["-uu", "--"], k=5)) + random.choice(["--", "-u"]))
|
||||
result.extend(hexameter)
|
||||
return result
|
||||
|
||||
mat = transition_matrix(get_transitions(sonett))
|
||||
print(mat)
|
||||
print(correlation(mat))
|
||||
|
||||
mat = transition_matrix(get_transitions(limerick))
|
||||
print(mat)
|
||||
print(correlation(mat))
|
||||
|
||||
|
||||
mat = transition_matrix(get_transitions(get_hexameter(k=2000)))
|
||||
print(mat)
|
||||
print(correlation(mat))
|
||||
|
||||
mat = transition_matrix(get_transitions(random.choices(["-", "u"], k=20000)))
|
||||
print(mat)
|
||||
print(correlation(mat))
|
||||
|
||||
print(list(itertools.islice(correlation_ranges(get_hexameter(k=2000)), 40)))
|
||||
Reference in New Issue
Block a user