Files
to-hen/quantitative-literaturwissenschaft/Untitled.ipynb

302 lines
189 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"id": "6330d681-02fc-4faa-8fc8-2df53e8207b4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from typing import List, Tuple, Literal, Generator, TypeVar\n",
"from numpy.typing import NDArray\n",
"import itertools\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "794067ab-a647-4229-8fbd-70eb932f30b5",
"metadata": {},
"outputs": [],
"source": [
"A = TypeVar(\"A\")\n",
"\n",
"def get_transitions(series: List[A], step=1) -> List[Tuple[A, A]]:\n",
" cycled_series = series[step:] + series[:step]\n",
" return list(zip(series, cycled_series))\n",
"\n",
"def transition_matrix(transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:\n",
" element, next_element = zip(*transitions)\n",
" crosstab = pd.crosstab(element, next_element)\n",
" return np.matrix(crosstab)\n",
"\n",
"def correlation(matrix: NDArray[np.integer]) -> float:\n",
" if matrix.shape != (2, 2):\n",
" raise ValueError(\"The input matrix must be 2x2\")\n",
" main_diagonal_product = np.prod(np.diag(matrix))\n",
" anti_diagonal_product = np.prod(np.diag(np.fliplr(matrix)))\n",
" difference = main_diagonal_product - anti_diagonal_product\n",
" row_products = np.prod(matrix.sum(axis=1))\n",
" col_products = np.prod(matrix.sum(axis=0))\n",
" product_of_sums = row_products * col_products\n",
" sqrt_product_of_sums = np.sqrt(product_of_sums)\n",
" return difference / sqrt_product_of_sums\n",
"\n",
"def correlation_ranges(series: List[A]) -> Generator[float, None, None]:\n",
" step = 0\n",
" while True:\n",
" transitions = get_transitions(series, step=step)\n",
" matrix = transition_matrix(transitions)\n",
" current_correlation = correlation(matrix)\n",
" yield current_correlation\n",
" step += 1"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "7cc57542-1518-40a0-b79a-fa84628317c2",
"metadata": {},
"outputs": [],
"source": [
"sonnet = list(14 * ((5 * \"u-\") + \"u\"))\n",
"\n",
"limerick = list(2 * \"u-uu-uu-u\" + 2 * \"u-uu-\" + \"u-uu-uu-u\")\n",
"\n",
"def syllables(k: int=1):\n",
" return random.choices([\"-\", \"u\"], k=k)\n",
"\n",
"def get_sapphic(k: int = 1):\n",
" result = []\n",
" for _ in range(k):\n",
" for _ in range(3):\n",
" sapphic = list(\"-u-\") + syllables() + list(\"-uu-u-\") + syllables()\n",
" result.extend(sapphic)\n",
" adonic = list(\"-uu-\") + syllables()\n",
" result.extend(adonic)\n",
" return result\n",
"\n",
" \n",
"def get_hexameter(k: int = 1):\n",
" result = []\n",
" for _ in range(k):\n",
" hexameter = list(\"\".join(random.choices([\"-uu\", \"--\"], k=5)) + random.choice([\"--\", \"-u\"]))\n",
" result.extend(hexameter)\n",
" return result\n",
"\n",
"def get_phalaecean(k: int = 1):\n",
" result = []\n",
" for _ in range(k):\n",
" phalaecean = list(random.choice([\"--\", \"-u\", \"u-\"]) + \"-uu-u-u-\" + random.choice([\"-\", \"u\"]))\n",
" result.extend(phalaecean)\n",
" return result\n",
"\n",
"def get_elegiac_distich(k: int = 1):\n",
" result = []\n",
" for _ in range(k):\n",
" result.extend(get_hexameter())\n",
" pentameter = list(\"-\" + random.choice([\"-\", \"uu\"]) + \"-\" + random.choice([\"-\", \"uu\"]) + \"-uu-uu\" + random.choice([\"-\", \"u\"]))\n",
" result.extend(pentameter)\n",
" return result\n",
"\n",
"def get_shloka(k: int = 1):\n",
" result = []\n",
" for _ in range(k * 2):\n",
" shloka = list(\"\".join(random.choices([\"u\", \"-\"], k=4)) + \"u--\" + random.choice([\"u\", \"-\"]) + \"\".join(random.choices([\"u\", \"-\"], k=4)) + \"u-u\" + random.choice([\"u\", \"-\"]))\n",
" result.extend(shloka)\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "22284666-7675-416c-b5a9-42cf0b5463a4",
"metadata": {},
"outputs": [],
"source": [
"farthest_range = 16 + 1\n",
"\n",
"analyze_metre = lambda metre: pd.Series(itertools.islice(correlation_ranges(metre), farthest_range))\n",
"\n",
"df = pd.DataFrame()\n",
"df[\"Hexameters\"] = analyze_metre(get_hexameter(k=700))\n",
"df[\"Limerick\"] = analyze_metre(limerick)\n",
"df[\"Distich\"] = analyze_metre(get_elegiac_distich(k=400))\n",
"df[\"Phalaecean\"] = analyze_metre(get_phalaecean(k=20))\n",
"df[\"Sonnet\"] = analyze_metre(sonnet)\n",
"df[\"14 Sonnets\"] = analyze_metre(14 * sonnet + get_hexameter(k=20) + get_sapphic(k=3))\n",
"df[\"Shloka\"] = analyze_metre(get_shloka(k=50))\n",
"df[\"Sapphic\"] = analyze_metre(get_sapphic(k=8))\n",
"df[\"Random\"] = analyze_metre(syllables(k=400))\n",
"\n",
"lol = list((20 * 3 + 1) * \"u-u-u-u-u-u-u-u\")\n",
"df[\"lol\"] = analyze_metre(lol)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "63275c65-ed29-452e-ac1a-7e038fbda2cf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjgAAAGdCAYAAAAfTAk2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3gc5bX/PzPbV6tebVnuNsYYY5tqwLQAhpBcIHDhQgLhBkjyI4R2CQk3DdK45IZASG5IJU6nBEJIgIAJPXbophrbuMmyJauX7VPe3x/vzOyumtWlNfN5Hj2SZmdnZ6TdmTPfc873KEIIgYuLi4uLi4vLfoQ62Tvg4uLi4uLi4jLWuAGOi4uLi4uLy36HG+C4uLi4uLi47He4AY6Li4uLi4vLfocb4Li4uLi4uLjsd7gBjouLi4uLi8t+hxvguLi4uLi4uOx3uAGOi4uLi4uLy36Hd7J3YDIwTZM9e/ZQWFiIoiiTvTsuLi4uLi4uQ0AIQU9PD9OnT0dVB9doPpABzp49e6irq5vs3XBxcXFxcXEZAbt27WLGjBmDrvOBDHAKCwsB+QcqKiqa5L1xcXFxcXFxGQrd3d3U1dU51/HB+EAGOHZaqqioyA1wXFxcXFxc8oyhlJe4RcYuLi4uLi4u+x1ugOPi4uLi4uKy3+EGOC4uLi4uLi77HW6A4+Li4uLi4rLf4QY4Li4uLi4uLvsdboDj4uLi4uList/hBjguLi4uLi4u+x1ugOPi4uLi4uKy3+EGOC4uLi4uLi77HeMa4Dz33HN89KMfZfr06SiKwkMPPbTP5zz77LMceuihBINB5s6dy09+8pM+6zzwwAMsXryYQCDA4sWL+fOf/zwOe+/i4uLi4uKSr4xrgBOLxTjkkEP40Y9+NKT1t2/fzoc//GFWrVrF66+/zn//939z1VVX8cADDzjrrF+/nvPPP5+LLrqIN954g4suuojzzjuPF198cbwOw8XFxcXFxSXPUIQQYkJeSFH485//zFlnnTXgOl/84hd5+OGH2bhxo7Pss5/9LG+88Qbr168H4Pzzz6e7u5vHHnvMWee0006jtLSUP/7xj0Pal+7uboqLi+nq6nJnUbm4uLi4uOQJw7l+T6kanPXr13PqqafmLFu9ejWvvPIKmqYNus66desG3G4qlaK7uzvnazy4/95fctEDP+aiB/9vXLbv4uLiMhm8+8897NrYPtm74TJCorrBD3fuZUciNdm7MqFMqQCnqamJ6urqnGXV1dXouk5ra+ug6zQ1NQ243VtuuYXi4mLnq66ubux3HuiMdrG27Gj+WbKMtBWQubi4uOQz3a0Jnv7te6y9+53J3hWXEfLn5g6+va2R720f+Dq5PzKlAhzoOwLdzqBlL+9vncFGp9944410dXU5X7t27RrDPc5wxPJVAMSVAta98NdxeQ0XFxeXiSTakQQg0aNhaOYk743LSNidlDfcu1PpSd6TiWVKBTg1NTV9lJjm5ma8Xi/l5eWDrtNb1ckmEAhQVFSU8zUeLDlkBR6hA7Dl/VfH5TVcXFxcJpJ4d0aNTkQ/WBfI/YU2TV6X2tLGJO/JxDKlApyVK1eydu3anGVPPPEEhx12GD6fb9B1jj766Anbz4HweDxEzBgAUTUxyXvj4uLiMnoSPemsn93Uez7SmpYBTusHrHRiXAOcaDTKhg0b2LBhAyDbwDds2EB9fT0gU0cXX3yxs/5nP/tZdu7cyXXXXcfGjRu5++67+eUvf8n111/vrHP11VfzxBNPcOutt/Lee+9x66238uSTT3LNNdeM56EMmQJdBjbJ8IQ0p7m4uLiMK9kBTrzHVXDyEVvB6dAMjIlpnJ4SjGuA88orr7B8+XKWL18OwHXXXcfy5cv52te+BkBjY6MT7ADMmTOHRx99lGeeeYZly5bxzW9+kzvvvJNzzjnHWefoo4/mnnvu4Ve/+hVLly5lzZo13HvvvRx55JHjeShDpiAtTwCx8JQSx1xcXFxGRLZqk3ADnLykzVJwBDLI+aDgHc+Nn3DCCQxms7NmzZo+y44//nhee+21Qbd77rnncu65545298aFcDINhRALeoimkkQCwcneJRcXF5cRk5Oi6v5gpTj2F1otBUf+rFHhH9dL/5TBlRnGmHBMdhlEfUFe2Pb6JO+Ni4uLy+iI59TguApOvpE2Tbr0jGpjqzkfBNwAZ4wpst48PRSxsf6fk7w3Li4uLqPDTVHlN+29UlLZas7+jhvgjDEVXvkn7aYYo2vzJO+Ni4uLy+jILTJ2U1T5RluvgMZVcFxGzIziYgC6KaJI2zvJe+Pi4uIycgzdJBXPXBBdBSf/aO0V0LgKjsuImTVtBiAVnHJP1yTvjYuLi8vISUZzFRs3wMk/XAXHZcyYXVsDSAWnIBRjV4cb5Li4uOQnjnOxYv+uDdoZ6zL1aE3nBqmuguMyYmZWyhRVUgljhuM8t/3NSd4jFxcXl5Fht4UXVYQAMDQTLfXB8VHZH2iziowrrdZwV8FxGTHlYR+qKd9QUU8B7+59cZL3yMXFxWVk2C3iReVBfAEP4Kap8g1bwTkgLD3Zeqes9mfcAGeMURSFUCoFyDRVrOftSd4jFxcXl5FhBzOhQj+hQp+1zO2kyifsgOaAAjfAcRkDClJJQBYalxhuJ5WLi0t+YgczoUIfoUI/APFuV8HJJ+wuKjvAadcMdPODUUflBjjjQIGj4BRT441ifEDeTC4uLvsXuQqOP2eZS35gKzbzw0G7VpwO/YOh4rgBzjhQZL2huimiMpDk7abGSd4jFxcXl+FjBzPhQj9hN0WVl9gKTlXAS5nPm7Nsf8cNcMaBUkXGyT0UE/HHeWHHO5O8Ry4uLi7DJ95PispVcPKHlGnSY8j5iOU+L+VWgPNBqcNxA5xxoMovc53dFOH1J3l3rzt008XFJf9wU1T5jd0S7lWg2Otxpoi7Co7LiKkqLAKg0ywDoCfpdlK5uLjkH/11UbnzqPIHW6kp83lRFcVRcD4oZn9ugDMOTCuyAhxRCkCQnZO5Oy4uLi7DRksZ6GmZ3nBTVPmJHeDYgU35B8zszw1wxoGZFSUA9CADnTpfjJ6ke9fj4uKSP9iBjMenoigGgQJvznKXqY+dirJTUxVuDY7LaJleUgBAVI0AMMNn8vKuHZO4Ry4uLi7Dw3YxDhSY/OLzl/KPX3wHkAM4Tdf6Ii8YUMFxAxyXkTKjRBYZp1U/KfyU+XTW7XI7qVxcXPIHux3c5+0i3tVJ4+Z3EcJECEjFXEU6HxhIwXGLjF1GTJHfi8eQ86i6KabAH2djy8ZJ3isXFxeXoeOkqLwJAIQwCYRkYBN301R5QR8Fx01RuYwWRVEIW+MaOo1SFFUQTbkKjouLS/5gBziKEneW+QNJ6zFXwckH+ig4bpu4y1hQkJIngNZ0DQBhz06EcPPWLi4u+UGi2w5iYs4yj88OcFwFJx8YSMHp1A20D0AdlRvgjBNFukxRtWlVAFQF2tnTFR/sKS4uLi5TBjsNZepRZ5nqkecwN8DJDxwFxwpsSn0e56Lf/gFIU7kBzjhRYo016zAqAKjzKfxzx5bJ3CUXFxeXIWMHMXqq21mmiJj1mJuiygccBcdKTamK4syj+iDU4bgBzjhR5pWmWF2mNPub7lV4Zc+7k7lLLi4uLkPGDmLSyUyAY5pSzXGLjKc+ScMkas2hshUc+GDV4bgBzjhRGQoBsosKoNSXZlP75sncJRcXF5chYys4yWins8xI98jHut0AZ6pjKzQ+RaHI63GWf5A6qdwAZ5yoKZBmfz1qBCHA79XpTrsBjouLy9RHmIJEVEMInVSsx1luqzluimrq05pVYKwoirPcVXBcRs30YulinPAGMZJSxSn07iStm5O5Wy4uLi77JBXXEaYAM7cxIhXrBNwi43zAnjdV7vfkLHcVHJdRM61IpqgS/gDpeAkA1aEONjV3TOJeubi4uOwbu8bGa/neeP2ypjAZ60YI0w1w8gBbwanw+XKWuwqOy6iZXiTHNSR9fuJxOXSzxmewbud7k7lbLi4uLvvEDmB8fhngRLqjKAIQAkSMdNJA14xJ3EOXfZFRcLw5y10Fx2XUTCsKAKB
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df.plot()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "1e8f3480-243f-4726-8f69-43ed6ca388d8",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/run/user/1000/ipykernel_121584/3075710450.py:20: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
" ax.annotate(txt, (summary_df[\"range\"][i], summary_df[\"strength\"][i]))\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkUAAAHFCAYAAAD8Jo2EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAACDeElEQVR4nO3dd1gUV9sG8HtZytIRVEBEihXEhljQKBpFsMVYsUTFXmKL3RjFGkvsxhJNBDUae4gaO4pii4oSNSD2jqIgRRQE9nx/+DGv6wLuIsVy/65rL90zZ2aeOQy7D2fOnJEJIQSIiIiIPnM6RR0AERER0YeASRERERERmBQRERERAWBSRERERASASRERERERACZFRERERACYFBEREREBYFJEREREBIBJERERERGATyQpCgoKgkwmg0KhwJ07d9SWN2rUCG5ubkUQWf7w9/eHo6OjSpmjoyP8/f0LNY7bt29DJpMhKCjonXWjoqLQvXt3ODs7Q6FQoHjx4nB3d8eQIUOQlJRU8MF+IBo1agSZTAZfX1+1ZVntOW/evCKILH/IZDJMmTJFeh8aGgqZTIbQ0NBCjWPKlCmQyWTvrOfv7w+ZTAZTU1M8f/5cbfmdO3ego6Ojdlwfs/j4eHTu3BklS5aETCbD119/nWPdrPNVJpNBR0cHpqamKFeuHDp27Iht27ZBqVSqrZPdZ9GFCxfg5eUFc3NzyGQyLFq0CAAQEhICDw8PGBsbQyaTITg4OP8ONJ/9+OOPWsWX1W5ZL3NzczRq1Ah///13gcX48OFDTJkyBREREWrLNP2d0ETWd2x2r9GjR+fLPj4UukUdQH5KS0vDDz/8gPXr1xd1KAXuzz//hJmZWVGHka0LFy6gfv36cHFxweTJk+Ho6IinT5/i33//xaZNmzB69OgPNvaCsn//fhw+fBhffvllUYdSoNzd3XHq1Cm4uroWdSg50tPTQ0ZGBjZv3ow+ffqoLAsMDISpqeknlbhPnz4df/75J9asWYOyZcvC0tIy1/rOzs7YsGEDACAlJQW3bt1CcHAwOnbsiAYNGmDXrl0wNzeX6mf3WdS7d2+kpKRg06ZNKFasGBwdHSGEQKdOnVChQgXs3LkTxsbGqFixYv4fcD758ccf0aFDh1yTyLd16NABo0aNglKpxM2bNzFjxgy0bt0au3btQsuWLfM9xocPH2Lq1KlwdHRE9erVVZb17ds32z/G3kdgYCAqVaqkUlaqVKl83UdR+6SSIl9fX2zcuBGjR49GtWrVCmw/L1++hKGhYYFtXxM1atQo0v3nZtGiRdDR0UFoaChMTU2l8g4dOmD69On43B63V6FCBWRkZGDs2LE4e/Zsvv319rb09HTIZDLo6hbdr7WZmRnq1q1bZPvXhL6+Plq3bo01a9aoJEVCCAQFBcHPzw+rV68uwgjz1+XLl1G2bFl069ZNo/qGhoZqP8O+ffsiMDAQvXv3Rv/+/bF582ZpWXafRZcvX0a/fv3QvHlzqezBgweIj49H27Zt0aRJkzwejaoP4Zx/k7W1tdR29erVg6enJ8qVK4dFixYVSFKUm9KlS6N06dL5uk03Nzd4eHjk6zYB4MWLFzAyMsr37ebFJ3H5LMvYsWNhZWWFcePGvbNuamoqJkyYACcnJ+jr68POzg7ffvstEhISVOo5OjqiVatW2LFjB2rUqAGFQoGpU6dKlwk2btyIcePGwdbWFiYmJmjdujUeP36M5ORk9O/fH8WLF0fx4sXRq1cvte76ZcuWoWHDhihZsiSMjY1RpUoVzJ07F+np6e+M/+0u6ze7vd9+vXm569GjRxgwYABKly4NfX19ODk5YerUqcjIyFDZ/sOHD9GpUyeYmprC3Nwcfn5+ePTo0TvjAoC4uDiYmZnBxMQk2+VvJgU5XQZs1KgRGjVqJL3Pj/Z+24gRI2BsbJxtr4Cfnx+sra2ln8Xhw4fRqFEjWFlZwdDQEGXKlEH79u3x4sWLd7aHnp4eZs6cifDwcJUvk5xcvnwZbdq0QbFixaBQKFC9enWsXbtWpU5We6xfvx6jRo2CnZ0dDAwMcP36dfj7+8PExARXrlyBj48PjI2NYWtri9mzZwMATp8+jS+++ALGxsaoUKGC2rafPHmCwYMHw9XVFSYmJihZsiS+/PJLhIWFvTP2ty+fZV0izOn1pkOHDqFJkyYwMzODkZER6tevj5CQELV9/P3336hevToMDAzg5OSUp8uPvXv3xsmTJxEdHa2y/zt37qBXr17ZrqPp787UqVNRp04dWFpawszMDO7u7vjtt9/U/hjI+mzZt28f3N3dYWhoiEqVKmHNmjUaHUN8fDwGDx4MOzs76Ovrw9nZGRMnTkRaWhqA/7X9oUOHEBUVJbV5Xi9t9urVCy1atMDWrVtVhim8+TucdZklIyMDK1askPY5ZcoU6Qt63LhxkMlkKkMCrl27hq5du6JkyZIwMDCAi4sLli1bprL/3M55QLPzJ+uS0n///YcuXbrA3Nwc1tbW6N27NxITE6V6MpkMKSkpWLt2rXQMb34eaaps2bIoUaKESnvt3LkTnp6eMDIygqmpKby9vXHq1Cm1dd/VJqGhoahVqxaA1z+bN9v6zWN928aNG+Hp6QkTExOYmJigevXq+O2337Q+tuxocmxZcZ0/fx4dOnRAsWLFULZsWQD/+53YvXs3atSoAUNDQ7i4uGD37t0AXp9fLi4uMDY2Ru3atXHu3DmVbd+8eROdO3dGqVKlYGBgAGtrazRp0iTby4s5Ep+AwMBAAUCcPXtWLF68WAAQISEh0nIvLy9RuXJl6b1SqRQ+Pj5CV1dXTJo0SRw4cEDMmzdPGBsbixo1aojU1FSproODg7C1tRXOzs5izZo14siRI+LMmTPiyJEjAoBwcHAQ/v7+Yt++fWLlypXCxMRENG7cWHh7e4vRo0eLAwcOiDlz5gi5XC6GDh2qEvd3330nVqxYIfbt2ycOHz4sFi5cKIoXLy569eqlUq9nz57CwcFBpczBwUH07NlTev/ff/+JU6dOqbyaNm0q5HK5OHnypBBCiJiYGGFvby8cHBzEL7/8Ig4dOiSmT58uDAwMhL+/v7StFy9eCBcXF2Fubi6WLl0q9u/fL4YNGybKlCkjAIjAwMBcfx4zZswQAESXLl1EaGioePHiRY513z6OLF5eXsLLy0t6nx/t/bZ///1XABCrV69WKX/27JkwMDAQI0eOFEIIcevWLaFQKIS3t7cIDg4WoaGhYsOGDaJ79+7i2bNnue4j69xTKpWiZs2aomzZsuLVq1fSdgGIn376Sap/5coVYWpqKsqWLSvWrVsn/v77b9GlSxcBQMyZM0etPezs7ESHDh3Ezp07xe7du0VcXJzo2bOn0NfXFy4uLmLx4sXi4MGDolevXgKAmDBhgqhQoYL47bffxP79+0WrVq0EAHHu3DmVGAYNGiQ2bdokQkNDxe7du0WfPn2Ejo6OOHLkiMrxARABAQFqcWXVS01NVTsvd+7cKczMzISLi4u03vr164VMJhNff/212LFjh9i1a5do1aqVkMvl4tChQ1K9Q4cOCblcLr744guxY8cOsXXrVlGrVi3p3HyXnj17CmNjY6FUKoWDg4MYO3astMzPz080bNhQPHnyRO24NP3dEUIIf39/8dtvv4mDBw+KgwcPiunTpwtDQ0MxdepUlXoODg6idOnSwtXVVaxbt07s379fdOzYUQAQR48ezfU4Xr58KapWrSqMjY3FvHnzxIEDB8SkSZOErq6uaNGihUrb16hRQzg7O0vtn5iYmON23/6sfNvKlSsFALF+/XqV48j6HY6NjRWnTp0SAESHDh2kfd67d0/s2LFDABBDhw4Vp06dEufPnxdCvP7sMjc3F1WqVBHr1q0TBw4cEKNGjRI6OjpiypQp0n5yO+c1PX8CAgIEAFGxYkUxefJkcfDgQbFgwQJhYGCg8rl76tQpYWhoKFq0aCEdw3///ZfrzwSA+Pbbb1XK4uPjhY6OjqhXr54QQogNGzYIAKJZs2YiODhYbN68WdSsWVPo6+uLsLAwaT1N2iQxMVH67vvhhx9U2vrNY33TpEmTBADRrl07sXX
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"def calculate_half_life(series):\n",
" cumulative_sum = np.cumsum(series.abs().values)\n",
" total_sum = cumulative_sum[-1]\n",
" half_sum = total_sum / 2\n",
" half_life_index = np.where(cumulative_sum >= half_sum)[0][0]\n",
" return half_life_index\n",
"\n",
"# Create a DataFrame for the normalized sums and means\n",
"summary_df = pd.DataFrame({\n",
" 'strength': df.abs().sum() / len(df), \n",
" 'range': df.apply(calculate_half_life) / ((len(df) - 1)/2)\n",
"})\n",
"\n",
"# Plot scatter plot with normalized axes\n",
"fig, ax = plt.subplots()\n",
"ax.scatter(summary_df[\"range\"], summary_df[\"strength\"])\n",
"\n",
"# Add labels\n",
"for i, txt in enumerate(summary_df.index):\n",
" ax.annotate(txt, (summary_df[\"range\"][i], summary_df[\"strength\"][i]))\n",
"\n",
"# Display the plot\n",
"plt.ylabel('Stärke der metrischen Bindung')\n",
"plt.xlabel('Reichweite der metrischen Bindung')\n",
"plt.title('Normalized Sum vs Normalized Mean of Different Poetic Forms')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "3b51d2ab-8d5e-4804-a7c9-5cc18e9e47d7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Transition Matrix:\n",
"[[7 1]\n",
" [1 0]]\n",
"Diagonal Product Difference: (-0.125, -0.125)\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"def create_transition_matrix(s: str, unique_chars: str) -> np.ndarray:\n",
" n = len(unique_chars)\n",
" transitions = {c: {c_: 0 for c_ in unique_chars} for c in unique_chars}\n",
" \n",
" for i in range(len(s) - 1):\n",
" current_char = s[i]\n",
" next_char = s[i + 1]\n",
" transitions[current_char][next_char] += 1\n",
"\n",
" transition_matrix = np.array([[transitions[c][c_] for c_ in unique_chars] for c in unique_chars])\n",
" \n",
" return transition_matrix\n",
"\n",
" \n",
"def korrelation(matrix: np.ndarray) -> float:\n",
" a = matrix[0, 0]\n",
" b = matrix[0, 1]\n",
" c = matrix[1, 0]\n",
" d = matrix[1, 1]\n",
" return (a*d - b*c) / np.sqrt((a+b)*(a+c)*(b+d)*(c+d))\n",
"\n",
"# Example usage\n",
"s = 'aaaaaaabaa'\n",
"unique_chars = 'ab'\n",
"transition_matrix = create_transition_matrix(s, unique_chars)\n",
"\n",
"dpd = korrelation(transition_matrix), diagonal_product_difference(transition_matrix)\n",
"\n",
"print(f\"Transition Matrix:\\n{transition_matrix}\")\n",
"print(f\"Diagonal Product Difference: {dpd}\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}