243 lines
53 KiB
Plaintext
243 lines
53 KiB
Plaintext
|
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 60,
|
||
|
|
"id": "6330d681-02fc-4faa-8fc8-2df53e8207b4",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"import pandas as pd\n",
|
||
|
|
"import numpy as np\n",
|
||
|
|
"from typing import List, Tuple, Literal, Generator, TypeVar\n",
|
||
|
|
"from numpy.typing import NDArray\n",
|
||
|
|
"import itertools\n",
|
||
|
|
"import math\n",
|
||
|
|
"import matplotlib.pyplot as plt\n",
|
||
|
|
"import random"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 9,
|
||
|
|
"id": "794067ab-a647-4229-8fbd-70eb932f30b5",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"A = TypeVar(\"A\")\n",
|
||
|
|
"\n",
|
||
|
|
"def get_transitions(series: List[A], step=1) -> List[Tuple[A, A]]:\n",
|
||
|
|
" cycled_series = series[step:] + series[:step]\n",
|
||
|
|
" return list(zip(series, cycled_series))\n",
|
||
|
|
"\n",
|
||
|
|
"def transition_matrix(transitions: List[Tuple[A, A]]) -> NDArray[np.integer]:\n",
|
||
|
|
" element, next_element = zip(*transitions)\n",
|
||
|
|
" crosstab = pd.crosstab(element, next_element)\n",
|
||
|
|
" return np.matrix(crosstab)\n",
|
||
|
|
"\n",
|
||
|
|
"def correlation(matrix: NDArray[np.integer]) -> float:\n",
|
||
|
|
" if matrix.shape != (2, 2):\n",
|
||
|
|
" raise ValueError(\"The input matrix must be 2x2\")\n",
|
||
|
|
"\n",
|
||
|
|
" main_diagonal_product = matrix[0, 0] * matrix[1, 1]\n",
|
||
|
|
" other_diagonal_product = matrix[0, 1] * matrix[1, 0]\n",
|
||
|
|
" difference = main_diagonal_product - other_diagonal_product\n",
|
||
|
|
"\n",
|
||
|
|
" row_sums = matrix.sum(axis=1)\n",
|
||
|
|
" col_sums = matrix.sum(axis=0)\n",
|
||
|
|
" product_of_sums = np.prod(row_sums) * np.prod(col_sums)\n",
|
||
|
|
"\n",
|
||
|
|
" sqrt_product_of_sums = np.sqrt(product_of_sums)\n",
|
||
|
|
"\n",
|
||
|
|
" return difference / sqrt_product_of_sums\n",
|
||
|
|
"\n",
|
||
|
|
"def correlation_ranges(series: List[A]) -> Generator[float, None, None]:\n",
|
||
|
|
" step = 0\n",
|
||
|
|
" while True:\n",
|
||
|
|
" transitions = get_transitions(series, step=step)\n",
|
||
|
|
" matrix = transition_matrix(transitions)\n",
|
||
|
|
" current_correlation = correlation(matrix)\n",
|
||
|
|
" yield current_correlation\n",
|
||
|
|
" step += 1"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 70,
|
||
|
|
"id": "7cc57542-1518-40a0-b79a-fa84628317c2",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"sonnet = list(14 * ((5 * \"u-\") + \"u\"))\n",
|
||
|
|
"\n",
|
||
|
|
"limerick = list(2 * \"u-uu-uu-u\" + 2 * \"u-uu-\" + \"u-uu-uu-u\")\n",
|
||
|
|
"\n",
|
||
|
|
"def syllables(k: int=1):\n",
|
||
|
|
" return random.choices([\"-\", \"u\"], k=k)\n",
|
||
|
|
"\n",
|
||
|
|
"def get_sapphic(k: int = 1):\n",
|
||
|
|
" result = []\n",
|
||
|
|
" for _ in range(k):\n",
|
||
|
|
" for _ in range(3):\n",
|
||
|
|
" sapphic = list(\"-u-\") + syllables() + list(\"-uu-u-\") + syllables()\n",
|
||
|
|
" result.extend(sapphic)\n",
|
||
|
|
" adonic = list(\"-uu-\") + syllables()\n",
|
||
|
|
" result.extend(adonic)\n",
|
||
|
|
" return result\n",
|
||
|
|
"\n",
|
||
|
|
" \n",
|
||
|
|
"def get_hexameter(k: int = 1):\n",
|
||
|
|
" result = []\n",
|
||
|
|
" for _ in range(k):\n",
|
||
|
|
" hexameter = list(\"\".join(random.choices([\"-uu\", \"--\"], k=5)) + random.choice([\"--\", \"-u\"]))\n",
|
||
|
|
" result.extend(hexameter)\n",
|
||
|
|
" return result\n",
|
||
|
|
"\n",
|
||
|
|
"def get_phalaecean(k: int = 1):\n",
|
||
|
|
" result = []\n",
|
||
|
|
" for _ in range(k):\n",
|
||
|
|
" phalaecean = list(random.choice([\"--\", \"-u\", \"u-\"]) + \"-uu-u-u-\" + random.choice([\"-\", \"u\"]))\n",
|
||
|
|
" result.extend(phalaecean)\n",
|
||
|
|
" return result\n",
|
||
|
|
"\n",
|
||
|
|
"def get_elegiac_distich(k: int = 1):\n",
|
||
|
|
" result = []\n",
|
||
|
|
" for _ in range(k):\n",
|
||
|
|
" result.extend(get_hexameter())\n",
|
||
|
|
" pentameter = list(\"-\" + random.choice([\"-\", \"uu\"]) + \"-\" + random.choice([\"-\", \"uu\"]) + \"-uu-uu\" + random.choice([\"-\", \"u\"]))\n",
|
||
|
|
" result.extend(pentameter)\n",
|
||
|
|
" return result\n",
|
||
|
|
"\n",
|
||
|
|
"def get_shloka(k: int = 1):\n",
|
||
|
|
" result = []\n",
|
||
|
|
" for _ in range(k * 2):\n",
|
||
|
|
" shloka = list(\"\".join(random.choices([\"u\", \"-\"], k=4)) + \"u--\" + random.choice([\"u\", \"-\"]) + \"\".join(random.choices([\"u\", \"-\"], k=4)) + \"u-u\" + random.choice([\"u\", \"-\"]))\n",
|
||
|
|
" result.extend(shloka)\n",
|
||
|
|
" return result"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 112,
|
||
|
|
"id": "22284666-7675-416c-b5a9-42cf0b5463a4",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"farthest_range = 16 + 1\n",
|
||
|
|
"\n",
|
||
|
|
"analyze_metre = lambda metre: pd.Series(itertools.islice(correlation_ranges(metre), farthest_range))\n",
|
||
|
|
"\n",
|
||
|
|
"df = pd.DataFrame()\n",
|
||
|
|
"df[\"Hexameters\"] = analyze_metre(get_hexameter(k=700))\n",
|
||
|
|
"df[\"Limerick\"] = analyze_metre(limerick)\n",
|
||
|
|
"df[\"Distich\"] = analyze_metre(get_elegiac_distich(k=400))\n",
|
||
|
|
"df[\"Phalaecean\"] = analyze_metre(get_phalaecean(k=20))\n",
|
||
|
|
"df[\"Sonnet\"] = analyze_metre(sonnet)\n",
|
||
|
|
"df[\"14 Sonnets\"] = analyze_metre(14 * sonnet + get_hexameter(k=20) + get_sapphic(k=3))\n",
|
||
|
|
"df[\"Shloka\"] = analyze_metre(get_shloka(k=50))\n",
|
||
|
|
"df[\"Sapphic\"] = analyze_metre(get_sapphic(k=8))\n",
|
||
|
|
"df[\"Random\"] = analyze_metre(syllables(k=400))\n",
|
||
|
|
"\n",
|
||
|
|
"lol = list(\"------------------------------------------------------u----------------------------\")\n",
|
||
|
|
"df[\"lol\"] = analyze_metre(lol)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 108,
|
||
|
|
"id": "63275c65-ed29-452e-ac1a-7e038fbda2cf",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/plain": [
|
||
|
|
"matrix([[81, 1],\n",
|
||
|
|
" [ 1, 0]])"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 108,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"(transition_matrix(get_transitions(lol, step=10)))"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 113,
|
||
|
|
"id": "1e8f3480-243f-4726-8f69-43ed6ca388d8",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"/run/user/1000/ipykernel_238674/3830853690.py:19: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
||
|
|
" ax.annotate(txt, (summary_df[\"range\"][i], summary_df[\"strength\"][i]))\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlEAAAHFCAYAAADSY6wWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAACG5ElEQVR4nOzdd1gUV9sG8HupS0dQiog0G4gNiTWKRhFssfeI2GKJXWONgiUau9FEjUbBlqhRQ8BeMSp2xRIQFTUYRVFQQBQpe74//NjXleIuLLLo/buuvXTPnJl5zmHYfThzZkYihBAgIiIiIpVolXQARERERKURkygiIiKiQmASRURERFQITKKIiIiICoFJFBEREVEhMIkiIiIiKgQmUURERESFwCSKiIiIqBCYRBEREREVwkeRRAUHB0MikUAqleLff//NtbxZs2Zwd3cvgcjUw9/fH46Ojgpljo6O8Pf3/6Bx3Lt3DxKJBMHBwe+tGx0djb59+8LZ2RlSqRRly5aFh4cHRowYgZSUlOIPVkM0a9YMEokEvr6+uZbl9OeiRYtKIDL1kEgkCAwMlL8PDw+HRCJBeHj4B40jMDAQEonkvfX8/f0hkUhgYmKCFy9e5Fr+77//QktLK1e7SrOkpCT07NkTVlZWkEgk6NixY751c45XiUQCLS0tmJiYoFKlSujWrRt27NgBmUyWa528PosuX74MLy8vmJmZQSKRYNmyZQCAI0eOwNPTE0ZGRpBIJAgJCVFfQ9Vs7ty5KsWX0285LzMzMzRr1gx79uwpthgfPnyIwMBAREZG5lqm7O+EMnK+Y/N6TZgwQS37KK10SjoAdXr9+jW+++47bNq0qaRDKXZ//vknTE1NSzqMPF2+fBmNGzeGq6srZsyYAUdHRzx9+hRXrlzB1q1bMWHCBI2NvbgcOHAAR48exRdffFHSoRQrDw8PnD59Gm5ubiUdSr50dXWRlZWFbdu2YeDAgQrLgoKCYGJi8lEl+rNnz8aff/6J9evXw8XFBRYWFgXWd3Z2xpYtWwAAaWlpuHv3LkJCQtCtWzc0adIEYWFhMDMzk9fP67NowIABSEtLw9atW1GmTBk4OjpCCIHu3bujSpUqCA0NhZGREapWrar+BqvJ3Llz0bVr1wKTznd17doV48ePh0wmw507dzBnzhy0b98eYWFhaNu2rdpjfPjwIWbOnAlHR0fUrl1bYdmgQYPy/OOtKIKCglCtWjWFsvLly6t1H6XNR5VE+fr64rfffsOECRNQq1atYtvPq1evYGBgUGzbV0adOnVKdP8FWbZsGbS0tBAeHg4TExN5edeuXTF79mx8ao9rrFKlCrKysjBx4kScP39ebX8dviszMxMSiQQ6OiX3a21qaooGDRqU2P6Voaenh/bt22P9+vUKSZQQAsHBwejRowfWrl1bghGq1/Xr1+Hi4oI+ffooVd/AwCDXz3DQoEEICgrCgAED8PXXX2Pbtm3yZXl9Fl2/fh2DBw9G69at5WUPHjxAUlISOnXqhBYtWhSyNYo04Zh/m7W1tbzvGjVqhIYNG6JSpUpYtmxZsSRRBalQoQIqVKig1m26u7vD09NTrdsEgJcvX8LQ0FDt2/0QPorTeTkmTpwIS0tLTJo06b1109PTMWXKFDg5OUFPTw92dnb45ptv8Pz5c4V6jo6OaNeuHXbt2oU6depAKpVi5syZ8tMWv/32GyZNmgRbW1sYGxujffv2ePz4MVJTU/H111+jbNmyKFu2LPr375/r9MHPP/+Mpk2bwsrKCkZGRqhRowYWLFiAzMzM98b/7hD628Pw777ePv326NEjDBkyBBUqVICenh6cnJwwc+ZMZGVlKWz/4cOH6N69O0xMTGBmZoYePXrg0aNH740LABITE2FqagpjY+M8l7+dROR3WrJZs2Zo1qyZ/L06+vtdY8aMgZGRUZ6jDj169IC1tbX8Z3H06FE0a9YMlpaWMDAwQMWKFdGlSxe8fPnyvf2hq6uL77//HhcvXlT48snP9evX0aFDB5QpUwZSqRS1a9fGhg0bFOrk9MemTZswfvx42NnZQV9fH7dv34a/vz+MjY1x48YN+Pj4wMjICLa2tvjhhx8AAGfOnMHnn38OIyMjVKlSJde2nzx5guHDh8PNzQ3GxsawsrLCF198gRMnTrw39ndP5+Wcsszv9bbDhw+jRYsWMDU1haGhIRo3bowjR47k2seePXtQu3Zt6Ovrw8nJqVCnQwcMGICIiAjExMQo7P/ff/9F//7981xH2d+dmTNnon79+rCwsICpqSk8PDywbt26XH885Hy27N+/Hx4eHjAwMEC1atWwfv16pdqQlJSE4cOHw87ODnp6enB2dsa0adPw+vVrAP/r+8OHDyM6Olre54U91dq/f3+0adMGf/zxh8K0ibd/h3NO+2RlZWHVqlXyfQYGBsq/0CdNmgSJRKIwReHWrVvo3bs3rKysoK+vD1dXV/z8888K+y/omAeUO35yTnH9888/6NWrF8zMzGBtbY0BAwYgOTlZXk8ikSAtLQ0bNmyQt+HtzyNlubi4oFy5cgr9FRoaioYNG8LQ0BAmJibw9vbG6dOnc637vj4JDw/HZ599BuDNz+btvn67re/67bff0LBhQxgbG8PY2Bi1a9fGunXrVG5bXpRpW05cly5dQteuXVGmTBm4uLgA+N/vxO7du1GnTh0YGBjA1dUVu3fvBvDm+HJ1dYWRkRHq1auHCxcuKGz7zp076NmzJ8qXLw99fX1YW1ujRYsWeZ7uVBvxEQgKChIAxPnz58WPP/4oAIgjR47Il3t5eYnq1avL38tkMuHj4yN0dHTE9OnTxcGDB8WiRYuEkZGRqFOnjkhPT5fXdXBwELa2tsLZ2VmsX79eHDt2TJw7d04cO3ZMABAODg7C399f7N+/X6xevVoYGxuL5s2bC29vbzFhwgRx8OBBMX/+fKGtrS1GjhypEPfYsWPFqlWrxP79+8XRo0fF0qVLRdmyZUX//v0V6vXr1084ODgolDk4OIh+/frJ3//zzz/i9OnTCq+WLVsKbW1tERERIYQQIj4+Xtjb2wsHBwfxyy+/iMOHD4vZs2cLfX194e/vL9/Wy5cvhaurqzAzMxMrVqwQBw4cEKNGjRIVK1YUAERQUFCBP485c+YIAKJXr14iPDxcvHz5Mt+677Yjh5eXl/Dy8pK/V0d/v+vKlSsCgFi7dq1C+bNnz4S+vr4YN26cEEKIu3fvCqlUKry9vUVISIgIDw8XW7ZsEX379hXPnj0rcB85x55MJhN169YVLi4uIiMjQ75dAGLhwoXy+jdu3BAmJibCxcVFbNy4UezZs0f06tVLABDz58/P1R92dnaia9euIjQ0VOzevVskJiaKfv36CT09PeHq6ip+/PFHcejQIdG/f38BQEyZMkVUqVJFrFu3Thw4cEC0a9dOABAXLlxQiGHYsGFi69atIjw8XOzevVsMHDhQaGlpiWPHjim0D4AICAjIFVdOvfT09FzHZWhoqDA1NRWurq7y9TZt2iQkEono2LGj2LVrlwgLCxPt2rUT2tra4vDhw/J6hw8fFtra2uLzzz8Xu3btEn/88Yf47LPP5Mfm+/Tr108YGRkJmUwmHBwcxMSJE+XLevToIZo2bSqePHmSq13K/u4IIYS/v79Yt26dOHTokDh06JCYPXu2MDAwEDNnzlSo5+DgICpUqCDc3NzExo0bxYEDB0S3bt0EAHH8+PEC2/Hq1StRs2ZNYWRkJBYtWiQOHjwopk+fLnR0dESbNm0U+r5OnTrC2dlZ3v/Jycn5bvfdz8p3rV69WgAQmzZtUmhHzu9wQkKCOH36tAAgunbtKt/n/fv3xa5duwQAMXLkSHH69Glx6dIlIcSbzy4zMzNRo0YNsXHjRnHw4EExfvx4oaWlJQIDA+X7KeiYV/b4CQgIEABE1apVxYwZM8ShQ4fEkiVLhL6+vsLn7unTp4WBgYFo06aNvA3//PNPgT8TAOKbb75RKEtKShJaWlqiUaNGQgghtmzZIgCIVq1aiZCQELFt2zZRt25doaenJ06cOCFfT5k+SU5Oln/3fffddwp9/XZb3zZ9+nQBQHT
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 640x480 with 1 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Calculate sum and mean for each column\n",
|
||
|
|
"max_sum = sums.max() / len(df)\n",
|
||
|
|
"max_mean = means.max()\n",
|
||
|
|
"normalized_sums = sums\n",
|
||
|
|
"normalized_means = means / max_mean\n",
|
||
|
|
"\n",
|
||
|
|
"# Create a DataFrame for the normalized sums and means\n",
|
||
|
|
"summary_df = pd.DataFrame({\n",
|
||
|
|
" 'strength': df.abs().sum() / len(df), \n",
|
||
|
|
" 'range': df.abs().mean() / (len(df)/2)}\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"# Plot scatter plot with normalized axes\n",
|
||
|
|
"fig, ax = plt.subplots()\n",
|
||
|
|
"ax.scatter(summary_df[\"range\"], summary_df[\"strength\"])\n",
|
||
|
|
"\n",
|
||
|
|
"# Add labels\n",
|
||
|
|
"for i, txt in enumerate(summary_df.index):\n",
|
||
|
|
" ax.annotate(txt, (summary_df[\"range\"][i], summary_df[\"strength\"][i]))\n",
|
||
|
|
"\n",
|
||
|
|
"# Display the plot\n",
|
||
|
|
"plt.ylabel('Stärke der metrischen Bindung')\n",
|
||
|
|
"plt.xlabel('Reichweite der metrischen Bindung')\n",
|
||
|
|
"plt.title('Normalized Sum vs Normalized Mean of Different Poetic Forms')\n",
|
||
|
|
"plt.show()\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "Python 3 (ipykernel)",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"codemirror_mode": {
|
||
|
|
"name": "ipython",
|
||
|
|
"version": 3
|
||
|
|
},
|
||
|
|
"file_extension": ".py",
|
||
|
|
"mimetype": "text/x-python",
|
||
|
|
"name": "python",
|
||
|
|
"nbconvert_exporter": "python",
|
||
|
|
"pygments_lexer": "ipython3",
|
||
|
|
"version": "3.11.9"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 5
|
||
|
|
}
|