{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "8adcbe0819b88578", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]\n", "Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", "Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease \n", "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]\n", "Hit:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n", "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [2738 kB]\n", "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1513 kB]\n", "Fetched 4508 kB in 2s (2961 kB/s) \n", "Reading package lists... Done\n", "Reading package lists... Done\n", "Building dependency tree... Done\n", "Reading state information... Done\n", "graphviz is already the newest version (2.42.2-6ubuntu0.1).\n", "0 upgraded, 0 newly installed, 0 to remove and 121 not upgraded.\n", "Requirement already satisfied: tensorflow in /usr/local/lib/python3.11/dist-packages (2.14.0)\n", "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.0.0)\n", "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.6.3)\n", "Requirement already satisfied: flatbuffers>=23.5.26 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (23.5.26)\n", "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.5.4)\n", "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.2.0)\n", "Requirement already satisfied: h5py>=2.9.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (3.9.0)\n", "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (16.0.6)\n", "Requirement already satisfied: ml-dtypes==0.2.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.2.0)\n", "Requirement already satisfied: numpy>=1.23.5 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.26.0)\n", "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (3.3.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorflow) (23.1)\n", "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (4.24.3)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from tensorflow) (68.2.2)\n", "Requirement already satisfied: six>=1.12.0 in /usr/lib/python3/dist-packages (from tensorflow) (1.16.0)\n", "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.3.0)\n", "Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (4.8.0)\n", "Requirement already satisfied: wrapt<1.15,>=1.11.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.14.1)\n", "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.37.1)\n", "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.58.0)\n", "Requirement already satisfied: tensorboard<2.15,>=2.14 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.14.0)\n", "Requirement already satisfied: tensorflow-estimator<2.15,>=2.14.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.14.0)\n", "Requirement already satisfied: keras<2.15,>=2.14.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.14.0)\n", "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from astunparse>=1.6.0->tensorflow) (0.41.2)\n", "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (2.23.1)\n", "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (1.0.0)\n", "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (3.4.4)\n", "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (2.31.0)\n", "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (0.7.1)\n", "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (2.3.7)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (5.3.1)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (0.3.0)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (4.9)\n", "Requirement already satisfied: urllib3>=2.0.5 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (2.0.5)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow) (1.3.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.21.0->tensorboard<2.15,>=2.14->tensorflow) (3.2.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.21.0->tensorboard<2.15,>=2.14->tensorflow) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.21.0->tensorboard<2.15,>=2.14->tensorflow) (2023.7.22)\n", "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.11/dist-packages (from werkzeug>=1.0.1->tensorboard<2.15,>=2.14->tensorflow) (2.1.3)\n", "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.11/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (0.5.0)\n", "Requirement already satisfied: oauthlib>=3.0.0 in /usr/lib/python3/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow) (3.2.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (1.26.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.3)\n", "Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (1.26.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: keras in /usr/local/lib/python3.11/dist-packages (2.14.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.5.2)\n", "Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.26.0)\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.14.1)\n", "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.5.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (3.8.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.1.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (4.42.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.4.5)\n", "Requirement already satisfied: numpy<2,>=1.21 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.26.0)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (23.1)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (10.0.1)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (3.2.0)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (1.4.2)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: pyarrow in /usr/local/lib/python3.11/dist-packages (18.1.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: fastparquet in /usr/local/lib/python3.11/dist-packages (2024.11.0)\n", "Requirement already satisfied: pandas>=1.5.0 in /usr/local/lib/python3.11/dist-packages (from fastparquet) (2.2.3)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fastparquet) (1.26.0)\n", "Requirement already satisfied: cramjam>=2.3 in /usr/local/lib/python3.11/dist-packages (from fastparquet) (2.9.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from fastparquet) (2024.10.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from fastparquet) (23.1)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->fastparquet) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->fastparquet) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->fastparquet) (2024.2)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas>=1.5.0->fastparquet) (1.16.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (1.14.1)\n", "Requirement already satisfied: numpy<2.3,>=1.23.5 in /usr/local/lib/python3.11/dist-packages (from scipy) (1.26.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: seaborn in /usr/local/lib/python3.11/dist-packages (0.13.2)\n", "Requirement already satisfied: numpy!=1.24.0,>=1.20 in /usr/local/lib/python3.11/dist-packages (from seaborn) (1.26.0)\n", "Requirement already satisfied: pandas>=1.2 in /usr/local/lib/python3.11/dist-packages (from seaborn) (2.2.3)\n", "Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /usr/local/lib/python3.11/dist-packages (from seaborn) (3.8.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.1.1)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.42.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.5)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (23.1)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.0.1)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.0)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.2->seaborn) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.2->seaborn) (2024.2)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (4.67.1)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: pydot in /usr/local/lib/python3.11/dist-packages (3.0.2)\n", "Requirement already satisfied: pyparsing>=3.0.9 in /usr/local/lib/python3.11/dist-packages (from pydot) (3.2.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: tensorflow-io in /usr/local/lib/python3.11/dist-packages (0.37.1)\n", "Requirement already satisfied: tensorflow-io-gcs-filesystem==0.37.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow-io) (0.37.1)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: tensorflow-addons in /usr/local/lib/python3.11/dist-packages (0.23.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (23.1)\n", "Requirement already satisfied: typeguard<3.0.0,>=2.7 in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (2.13.3)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "# from opt_einsum.paths import branch_1\n", "!apt-get update\n", "!apt-get install graphviz -y\n", "\n", "!pip install tensorflow\n", "!pip install numpy\n", "!pip install pandas\n", "\n", "!pip install keras\n", "!pip install scikit-learn\n", "!pip install matplotlib\n", "!pip install joblib\n", "!pip install pyarrow\n", "!pip install fastparquet\n", "!pip install scipy\n", "!pip install seaborn\n", "!pip install tqdm\n", "!pip install pydot\n", "!pip install tensorflow-io\n", "!pip install tensorflow-addons" ] }, { "cell_type": "code", "execution_count": 2, "id": "e6fe6bb613168a8a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-11-27 13:56:39.957016: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-11-27 13:56:39.957067: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-11-27 13:56:39.957117: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-11-27 13:56:39.966205: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "/usr/local/lib/python3.11/dist-packages/tensorflow_addons/utils/tfa_eol_msg.py:23: UserWarning: \n", "\n", "TensorFlow Addons (TFA) has ended development and introduction of new features.\n", "TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n", "Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n", "\n", "For more information see: https://github.com/tensorflow/addons/issues/2807 \n", "\n", " warnings.warn(\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow.keras.layers import (\n", " Dense, LSTM, MultiHeadAttention, Dropout, BatchNormalization, \n", " LayerNormalization, Input, Activation, Lambda, Bidirectional, \n", " Add, MaxPooling1D, SpatialDropout1D, GlobalAveragePooling1D,\n", " GlobalMaxPooling1D, Concatenate, ThresholdedReLU, Average,\n", " Conv1D, Multiply\n", ")\n", "from tensorflow.keras import regularizers\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau\n", "from tensorflow.keras.optimizers import AdamW\n", "from tensorflow.keras.metrics import AUC\n", "from tensorflow.keras.utils import plot_model\n", "\n", "# Data processing and analysis\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import RobustScaler\n", "from sklearn.metrics import (\n", " mean_absolute_error, mean_squared_error, r2_score, \n", " confusion_matrix, classification_report, roc_auc_score\n", ")\n", "\n", "# Visualization\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "# Additional utilities\n", "import tensorflow_addons as tfa\n", "from scipy import stats\n", "import json\n", "from datetime import datetime\n", "import os\n", "import joblib\n", "\n", "folder_name = datetime.now().strftime(\"%Y-%m-%d_%H-%M\")\n", "\n", "random_state_value = None" ] }, { "cell_type": "code", "execution_count": 3, "id": "3da8b15c7eb9833f", "metadata": {}, "outputs": [], "source": [ "def get_season(date):\n", " month = date.month\n", " day = date.day\n", " if (month == 12 and day >= 21) or (month <= 3 and day < 20):\n", " return 'Winter'\n", " elif (month == 3 and day >= 20) or (month <= 6 and day < 21):\n", " return 'Spring'\n", " elif (month == 6 and day >= 21) or (month <= 9 and day < 23):\n", " return 'Summer'\n", " elif (month == 9 and day >= 23) or (month <= 12 and day < 21):\n", " return 'Autumn'\n", " else:\n", " return 'Unknown'\n", "\n", "\n", "def get_time_period(hour):\n", " if 5 <= hour < 12:\n", " return 'Morning'\n", " elif 12 <= hour < 17:\n", " return 'Afternoon'\n", " elif 17 <= hour < 21:\n", " return 'Evening'\n", " else:\n", " return 'Night'\n", "\n", "\n", "def add_time_features(df):\n", " df['datetime'] = pd.to_datetime(df['datetime'])\n", " df['timestamp'] = df['datetime'].astype(np.int64) // 10 ** 9\n", " df['year'] = df['datetime'].dt.year\n", " df['month'] = df['datetime'].dt.month\n", " df['day'] = df['datetime'].dt.day\n", " df['hour'] = df['datetime'].dt.hour\n", " df['minute'] = df['datetime'].dt.minute\n", " df['hour_sin'] = np.sin(df['hour'] * (2 * np.pi / 24))\n", " df['hour_cos'] = np.cos(df['hour'] * (2 * np.pi / 24))\n", " df['day_of_week'] = df['datetime'].dt.dayofweek\n", " df['day_of_year'] = df['datetime'].dt.dayofyear\n", " df['week_of_year'] = df['datetime'].dt.isocalendar().week.astype(int)\n", " df['quarter'] = df['datetime'].dt.quarter\n", " df['is_month_end'] = df['datetime'].dt.is_month_end.astype(int)\n", " df['is_quarter_end'] = df['datetime'].dt.is_quarter_end.astype(int)\n", " df['is_year_end'] = df['datetime'].dt.is_year_end.astype(int)\n", " df['month_sin'] = np.sin(df['month'] * (2 * np.pi / 12))\n", " df['month_cos'] = np.cos(df['month'] * (2 * np.pi / 12))\n", " df['day_of_year_sin'] = np.sin(df['day_of_year'] * (2 * np.pi / 365.25))\n", " df['day_of_year_cos'] = np.cos(df['day_of_year'] * (2 * np.pi / 365.25))\n", " df['season'] = df['datetime'].apply(get_season)\n", " df['time_period'] = df['hour'].apply(get_time_period)\n", " return df\n", "\n", "\n", "def add_solar_features(df):\n", " # Features based only on radiation and other available variables\n", " df['solar_elevation'] = np.sin(df['day_of_year'] * (2 * np.pi / 365.25)) * np.sin(df['hour'] * (2 * np.pi / 24))\n", "\n", " # Energy-specific features\n", " df['radiation_clearsky'] = df['solarradiation'] * (100 - df['cloudcover']) / 100\n", "\n", " # Temperature impact on theoretical efficiency\n", " df['temp_efficiency_factor'] = 1 - 0.004 * (df['temp'] - 25) # Typical temperature coefficient\n", "\n", " # Combined features\n", " df['cloud_impact'] = df['cloudcover'] * df['solarradiation']\n", " df['visibility_radiation'] = df['visibility'] * df['solarradiation']\n", " df['clear_sky_index'] = (100 - df['cloudcover']) / 100\n", " df['temp_effect'] = df['temp'] - df['tempmin']\n", "\n", " return df\n", "\n", "def add_solar_specific_features(df):\n", " \"\"\"\n", " Aggiunge feature specifiche per la predizione della radiazione solare\n", " combinando caratteristiche astronomiche e meteorologiche\n", " \"\"\"\n", " # Caratteristiche astronomiche\n", " df['day_length'] = 12 + 3 * np.sin(2 * np.pi * (df['day_of_year'] - 81) / 365.25)\n", " df['solar_noon'] = np.abs(12 - df['hour'])\n", " df['solar_elevation'] = np.sin(2 * np.pi * df['day_of_year'] / 365.25) * np.cos(2 * np.pi * df['solar_noon'] / 24)\n", "\n", " # Angolo solare teorico\n", " df['solar_angle'] = np.sin(df['hour_sin']) * np.sin(df['day_of_year_sin'])\n", "\n", " # Interazioni con condizioni atmosferiche\n", " df['cloud_elevation'] = df['cloudcover'] * df['solar_elevation']\n", " df['visibility_elevation'] = df['visibility'] * df['solar_elevation']\n", " df['uv_cloud_interaction'] = df['uvindex'] * (100 - df['cloudcover']) / 100\n", "\n", " # Indici di chiarezza e trasmissione\n", " df['clearness_index'] = (100 - df['cloudcover']) * df['visibility'] / 10000\n", " df['atmospheric_attenuation'] = (df['pressure'] / 1013.25) * (1 - (df['humidity'] / 100) * 0.6)\n", "\n", " # Radiazione teorica e attenuazione\n", " df['theoretical_radiation'] = df['solar_angle'].clip(0, 1) * 1000\n", " df['expected_radiation'] = df['theoretical_radiation'] * df['clearness_index']\n", "\n", " # Rolling features\n", " df['cloud_rolling_12h'] = df['cloudcover'].rolling(window=12).mean()\n", " df['temp_rolling_12h'] = df['temp'].rolling(window=12).mean()\n", " df['uv_rolling_12h'] = df['uvindex'].rolling(window=12).mean()\n", "\n", " # Interazioni temperatura-radiazione\n", " df['temp_radiation_potential'] = df['temp'] * df['solar_elevation']\n", "\n", " return df\n", "\n", "def add_radiation_energy_features(df):\n", " \"\"\"Adds specific features based on solarenergy and uvindex\"\"\"\n", "\n", " # Solar energy to UV ratio (independent from solarradiation)\n", " df['energy_uv_ratio'] = df['solarenergy'] / (df['uvindex'] + 1e-6)\n", "\n", " # Time aggregations\n", " # Moving averages\n", " windows = [3, 6, 12, 24] # hours\n", " for w in windows:\n", " df[f'energy_rolling_mean_{w}h'] = df['solarenergy'].rolling(window=w).mean()\n", " df[f'uv_rolling_mean_{w}h'] = df['uvindex'].rolling(window=w).mean()\n", "\n", " # Daily aggregations utilizzando datetime\n", " df['energy_daily_sum'] = df.groupby(df['datetime'].dt.date)['solarenergy'].transform('sum')\n", " df['uv_daily_max'] = df.groupby(df['datetime'].dt.date)['uvindex'].transform('max')\n", "\n", " # Changes\n", " df['energy_change'] = df['solarenergy'].diff()\n", " df['uv_change'] = df['uvindex'].diff()\n", "\n", " # Lag features\n", " lags = [1, 2, 3, 6, 12, 24] # hours\n", " for lag in lags:\n", " df[f'energy_lag_{lag}h'] = df['solarenergy'].shift(lag)\n", " df[f'uv_lag_{lag}h'] = df['uvindex'].shift(lag)\n", "\n", " # Peak indicators\n", " df['is_energy_peak'] = (df['solarenergy'] > df['energy_rolling_mean_6h'] * 1.2).astype(int)\n", " df['is_uv_peak'] = (df['uvindex'] > df['uv_rolling_mean_6h'] * 1.2).astype(int)\n", "\n", " # Aggiungiamo alcune metriche di volatilità\n", " df['energy_volatility'] = df['energy_change'].rolling(window=24).std()\n", " df['uv_volatility'] = df['uv_change'].rolling(window=24).std()\n", "\n", " # Indice di intensità solare composito\n", " df['solar_intensity_index'] = (df['solarenergy'] * df['uvindex']) / (df['cloudcover'] + 1e-6)\n", "\n", " # Interazioni\n", " df['uv_cloud_interaction'] = df['uvindex'] * (100 - df['cloudcover']) / 100\n", " df['energy_temp_interaction'] = df['solarenergy'] * df['temp']\n", "\n", " return df\n", "\n", "def add_atmospheric_features(df):\n", " # Indice di Massa d'Aria (Air Mass Index)\n", " # Rappresenta il percorso ottico relativo dei raggi solari attraverso l'atmosfera\n", " df['air_mass_index'] = 1 / (np.cos(np.radians(90 - df['solar_elevation'])) + 0.50572 *\n", " (96.07995 - (90 - df['solar_elevation']))**-1.6364)\n", "\n", " # Indice di Stabilità Atmosferica\n", " # Combina temperatura, umidità e pressione\n", " df['atmospheric_stability'] = (df['temp'] * (100 - df['humidity'])) / df['pressure']\n", "\n", " # Vapor Pressure Deficit (VPD)\n", " # Importante per la radiazione diffusa\n", " df['saturation_vapor_pressure'] = 0.6108 * np.exp(17.27 * df['temp'] / (df['temp'] + 237.3))\n", " df['actual_vapor_pressure'] = df['saturation_vapor_pressure'] * (df['humidity'] / 100)\n", " df['vapor_pressure_deficit'] = df['saturation_vapor_pressure'] - df['actual_vapor_pressure']\n", "\n", " return df\n", "\n", "def add_diffusion_features(df):\n", " # Indice di Diffusione\n", " df['diffusion_index'] = (df['cloudcover'] * df['humidity']) / 10000\n", "\n", " # Radiazione Diretta vs Diffusa\n", " df['direct_radiation'] = df['solarradiation'] * (1 - df['diffusion_index'])\n", " df['diffuse_radiation'] = df['solarradiation'] * df['diffusion_index']\n", "\n", " # Fattore di Trasparenza Atmosferica\n", " df['atmospheric_transmittance'] = (1 - df['cloudcover']/100) * (df['visibility']/10) * (1 - df['humidity']/200)\n", "\n", " return df\n", "\n", "def calculate_trend(x):\n", " try:\n", " return np.polyfit(np.arange(len(x)), x, 1)[0]\n", " except:\n", " return np.nan\n", "\n", "def add_persistence_features(df):\n", " # Create a copy to avoid modifying the original dataframe\n", " df = df.copy()\n", "\n", " # Calculate trends more efficiently\n", " windows = [3, 6, 12, 24]\n", " for w in windows:\n", " # Use numba or vectorized operations if possible\n", " df[f'radiation_trend_{w}h'] = df['solarradiation'].rolling(\n", " window=w,\n", " min_periods=w\n", " ).apply(calculate_trend, raw=True)\n", "\n", " # Optimize volatility calculation by doing it in one pass\n", " rolling_24 = df['solarradiation'].rolling(24, min_periods=1)\n", " df['radiation_volatility'] = rolling_24.std() / rolling_24.mean().clip(lower=1e-10)\n", "\n", " return df\n", "\n", "def add_weather_pattern_features(df):\n", " # Pattern giornalieri\n", " df['clear_sky_duration'] = df.groupby(df['datetime'].dt.date)['cloudcover'].transform(\n", " lambda x: (x < 30).sum()\n", " )\n", "\n", " # Stabilità delle condizioni\n", " for col in ['temp', 'humidity', 'cloudcover']:\n", " df[f'{col}_stability'] = df[col].rolling(12).std() / df[col].rolling(12).mean()\n", "\n", " # Indice di Variabilità Meteorologica\n", " df['weather_variability_index'] = (df['temp_stability'] +\n", " df['humidity_stability'] +\n", " df['cloudcover_stability']) / 3\n", "\n", " return df\n", "\n", "def add_efficiency_features(df):\n", " # Perdite per temperatura\n", " df['temp_losses'] = 0.004 * (df['temp'] - 25).clip(lower=0) # 0.4% per grado sopra 25°C\n", "\n", " # Perdite per polvere/sporco (stima basata su umidità e pressione)\n", " df['soiling_loss_factor'] = 0.002 * (df['humidity']/100) * (df['pressure']/1013.25)\n", "\n", " # Efficienza complessiva stimata\n", " df['estimated_efficiency'] = (1 - df['temp_losses']) * (1 - df['soiling_loss_factor']) * \\\n", " df['atmospheric_transmittance']\n", "\n", " # Potenziale di produzione\n", " df['production_potential'] = df['solarradiation'] * df['estimated_efficiency']\n", "\n", " return df\n", "\n", "def add_advanced_seasonal_features(df):\n", " # Differenza dalla durata media del giorno\n", " avg_day_length = 12\n", " df['day_length_deviation'] = df['day_length'] - avg_day_length\n", "\n", " # Intensità stagionale\n", " df['seasonal_intensity'] = np.sin(2 * np.pi * (df['day_of_year'] - 172) / 365.25)\n", "\n", " # Indice di Stagionalità\n", " df['seasonality_index'] = df['seasonal_intensity'] * df['solar_elevation']\n", "\n", " # Correzione per alba/tramonto\n", " df['daylight_correction'] = np.where(\n", " (df['hour'] >= df['day_length']) | (df['hour'] <= 24-df['day_length']),\n", " 0,\n", " 1\n", " )\n", "\n", " return df\n", "\n", "def add_basic_interactions(df):\n", " \"\"\"\n", " Aggiunge le interazioni base tra variabili meteorologiche\n", " \"\"\"\n", " # Feature esistenti originali\n", " df['temp_humidity'] = df['temp'] * df['humidity']\n", " df['temp_cloudcover'] = df['temp'] * df['cloudcover']\n", " df['visibility_cloudcover'] = df['visibility'] * df['cloudcover']\n", " df['temp_humidity_interaction'] = df['temp'] * df['humidity'] / 100\n", "\n", " # Clear sky e trasparenza atmosferica\n", " df['clear_sky_factor'] = (100 - df['cloudcover']) / 100\n", " df['atmospheric_transparency'] = (100 - df['cloudcover']) * (df['visibility'] / 10)\n", "\n", " return df\n", "\n", "def add_rolling_and_lag_features(df):\n", " \"\"\"\n", " Aggiunge feature rolling e lag\n", " \"\"\"\n", " # Rolling means esistenti\n", " df['temp_rolling_mean_6h'] = df['temp'].rolling(window=6).mean()\n", " df['cloudcover_rolling_mean_6h'] = df['cloudcover'].rolling(window=6).mean()\n", "\n", " # Lag features esistenti\n", " df['temp_1h_lag'] = df['temp'].shift(1)\n", " df['cloudcover_1h_lag'] = df['cloudcover'].shift(1)\n", " df['humidity_1h_lag'] = df['humidity'].shift(1)\n", "\n", " return df\n", "\n", "def add_condition_indicators(df):\n", " \"\"\"\n", " Aggiunge indicatori di condizioni particolari\n", " \"\"\"\n", " # Extreme conditions indicator esistente\n", " df['extreme_conditions'] = ((df['temp'] > df['temp'].quantile(0.75)) &\n", " (df['humidity'] < df['humidity'].quantile(0.25))).astype(int)\n", "\n", " return df\n", "\n", "def add_physics_based_conversion_features(df):\n", " \"\"\"\n", " Aggiunge feature specifiche per la conversione tra radiazione ed energia\n", " \"\"\"\n", " # Conversione da kWh a MJ/m²/h (1 W = 1 J/s = 0.0036 MJ/h)\n", " df['radiation_to_energy'] = df['solarradiation'] * 0.0036\n", "\n", " # Efficienza di conversione reale vs teorica\n", " df['conversion_efficiency_ratio'] = df['solarenergy'] / df['radiation_to_energy'].clip(lower=1e-6)\n", "\n", " # Energia accumulata nel tempo (integrazione)\n", " df['energy_integral'] = df['radiation_to_energy'].rolling(window=24).sum()\n", "\n", " # Differenza tra energia teorica e reale\n", " df['energy_conversion_gap'] = df['radiation_to_energy'] - df['solarenergy']\n", "\n", " # Indice di performance del sistema\n", " df['system_performance_ratio'] = df['solarenergy'] / df['radiation_to_energy'].clip(lower=1e-6)\n", "\n", " return df\n", "\n", "def add_advanced_features(df):\n", " \"\"\"\n", " Add all advanced features to the DataFrame\n", " \"\"\"\n", " # Feature esistenti di base\n", " # 1. Feature temporali di base\n", " df = add_time_features(df)\n", "\n", " # 2. Feature solari e meteorologiche\n", " df = add_solar_features(df)\n", " df = add_solar_specific_features(df)\n", " df = add_radiation_energy_features(df)\n", "\n", " # 3. Feature atmosferiche e di diffusione\n", " df = add_atmospheric_features(df)\n", " df = add_diffusion_features(df)\n", "\n", " # 4. Feature di persistenza e pattern\n", " df = add_persistence_features(df)\n", " df = add_weather_pattern_features(df)\n", "\n", " # 5. Feature di efficienza e stagionalità\n", " df = add_efficiency_features(df)\n", " df = add_advanced_seasonal_features(df)\n", "\n", " # 6. Interazioni e feature derivate\n", " df = add_basic_interactions(df)\n", " df = add_rolling_and_lag_features(df)\n", " df = add_condition_indicators(df)\n", "\n", " # 7. Nuove feature di conversione fisica\n", " df = add_physics_based_conversion_features(df)\n", "\n", " # 8. One-hot encoding delle feature categoriche\n", " df = pd.get_dummies(df, columns=['season', 'time_period'])\n", "\n", " return df\n", "\n", "\n", "def prepare_advanced_data(df):\n", " \"\"\"\n", " Prepare data for advanced modeling with proper datetime handling\n", " \"\"\"\n", " # Assicuriamoci che abbiamo una copia del DataFrame\n", " df = df.copy()\n", "\n", " # Apply feature engineering functions\n", " df = add_advanced_features(df)\n", "\n", " #all_columns = list(df.columns)\n", " #print(all_columns)\n", "\n", " features = {\n", " # Primary Features (strong direct correlation)\n", " 'primary_features': [\n", " 'uvindex',\n", " 'cloudcover',\n", " 'visibility',\n", " 'temp',\n", " 'pressure',\n", " 'humidity',\n", " 'solarradiation'\n", " ],\n", "\n", " # Astronomical and Temporal Features\n", " 'astronomical_features': [\n", " 'solar_elevation',\n", " 'solar_angle',\n", " 'day_length',\n", " 'hour_sin',\n", " 'hour_cos',\n", " 'day_of_year_sin',\n", " 'day_of_year_cos',\n", " 'month_sin',\n", " 'month_cos',\n", " 'solar_noon',\n", " 'daylight_correction'\n", " ],\n", "\n", " # Key Indices and Interactions\n", " 'key_interactions': [\n", " 'clear_sky_index',\n", " 'atmospheric_attenuation',\n", " 'theoretical_radiation',\n", " 'expected_radiation',\n", " 'cloud_elevation',\n", " 'visibility_elevation',\n", " 'uv_cloud_interaction',\n", " 'temp_radiation_potential',\n", " 'air_mass_index',\n", " 'atmospheric_stability',\n", " 'vapor_pressure_deficit',\n", " 'diffusion_index',\n", " 'atmospheric_transmittance',\n", " 'temp_humidity_interaction',\n", " 'clear_sky_factor'\n", " ],\n", "\n", " # Rolling Features (temporal trends)\n", " 'rolling_features': [\n", " 'cloud_rolling_12h',\n", " 'temp_rolling_12h',\n", " 'uv_rolling_12h',\n", " 'cloudcover_rolling_mean_6h',\n", " 'temp_rolling_mean_6h',\n", " 'energy_rolling_mean_6h',\n", " 'uv_rolling_mean_6h',\n", " 'energy_volatility',\n", " 'uv_volatility'\n", " ],\n", "\n", " # Lag Features\n", " 'lag_features': [\n", " 'temp_1h_lag',\n", " 'cloudcover_1h_lag',\n", " 'humidity_1h_lag',\n", " 'energy_lag_1h',\n", " 'uv_lag_1h'\n", " ],\n", "\n", " # Efficiency and Performance Features\n", " 'efficiency_features': [\n", " 'temp_losses',\n", " 'soiling_loss_factor',\n", " 'estimated_efficiency',\n", " 'production_potential',\n", " 'system_performance_ratio',\n", " 'conversion_efficiency_ratio'\n", " ],\n", "\n", " # Weather Pattern Features\n", " 'weather_pattern_features': [\n", " 'clear_sky_duration',\n", " 'weather_variability_index',\n", " 'temp_stability',\n", " 'humidity_stability',\n", " 'cloudcover_stability'\n", " ],\n", "\n", " # Categorical Features\n", " 'categorical_features': [\n", " 'season_Spring',\n", " 'season_Summer',\n", " 'season_Autumn',\n", " 'season_Winter',\n", " 'time_period_Morning',\n", " 'time_period_Afternoon',\n", " 'time_period_Evening',\n", " 'time_period_Night'\n", " ]\n", " }\n", "\n", " final_features = [feature for group in features.values() for feature in group]\n", "\n", " if not isinstance(df.index, pd.DatetimeIndex):\n", " if 'datetime' in df.columns:\n", " df['datetime'] = pd.to_datetime(df['datetime'])\n", " df.set_index('datetime', inplace=True)\n", " else:\n", " raise ValueError(\"No datetime column or index found in DataFrame\")\n", "\n", " # Ordiniamo il DataFrame per datetime\n", " df = df.sort_index()\n", "\n", " # Handle missing values\n", " target_variables = ['solarradiation', 'solarenergy', 'uvindex']\n", " for column in final_features + target_variables:\n", " if column in df.columns:\n", " if isinstance(df.index, pd.DatetimeIndex):\n", " df[column] = df[column].interpolate(method='time')\n", " else:\n", " df[column] = df[column].interpolate(method='linear')\n", "\n", " df.fillna(0, inplace=True)\n", "\n", " # Temporal split\n", " data_after_2010 = df[df['year'] >= 2010].copy()\n", " data_before_2010 = df[df['year'] < 2010].copy()\n", "\n", " X = data_after_2010[final_features]\n", " y = data_after_2010['solarenergy']\n", " X_to_predict = data_before_2010[final_features]\n", "\n", " # Train-test split\n", " X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.13, random_state=random_state_value, shuffle=False\n", " )\n", "\n", " # Scaling\n", " scaler_X = RobustScaler()\n", " X_train_scaled = scaler_X.fit_transform(X_train)\n", " X_test_scaled = scaler_X.transform(X_test)\n", " X_to_predict_scaled = scaler_X.transform(X_to_predict)\n", "\n", " scaler_y = RobustScaler()\n", " y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))\n", " y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))\n", "\n", " # Print info about selected features\n", " print(\"\\nSelected features:\")\n", " print(f\"Number of features: {len(final_features)}\")\n", " print(\"Features list:\", final_features)\n", "\n", " return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, scaler_X, scaler_y, final_features, X_to_predict_scaled\n", "\n", "\n", "def create_sequence_data(X, sequence_length=24):\n", " \"\"\"\n", " Converts data into sequences for LSTM input\n", " sequence_length represents how many previous hours to consider\n", " \"\"\"\n", " sequences = []\n", " for i in range(len(X) - sequence_length + 1):\n", " sequences.append(X[i:i + sequence_length])\n", " return np.array(sequences)\n", "\n", "\n", "def prepare_hybrid_data(df):\n", " X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, scaler_X, scaler_y, features, X_to_predict_scaled = prepare_advanced_data(df)\n", "\n", " # Convert data into sequences\n", " sequence_length = 24 # 24 hours of historical data\n", "\n", " X_train_seq = create_sequence_data(X_train_scaled, sequence_length)\n", " X_test_seq = create_sequence_data(X_test_scaled, sequence_length)\n", "\n", " # Adjust y by removing the first (sequence_length-1) elements\n", " y_train = y_train_scaled[sequence_length - 1:]\n", " y_test = y_test_scaled[sequence_length - 1:]\n", "\n", " X_to_predict_seq = create_sequence_data(X_to_predict_scaled, sequence_length)\n", "\n", " return X_train_seq, X_test_seq, y_train, y_test, scaler_X, scaler_y, features, X_to_predict_seq" ] }, { "cell_type": "code", "execution_count": 8, "id": "570b18f2caa3e0db", "metadata": {}, "outputs": [], "source": [ "def create_solarenergy_model(input_shape, folder_name, l2_lambda=0.005, min_output=0, max_output=4.0):\n", " from tensorflow import keras\n", " from keras.models import Model\n", " from keras.layers import (\n", " Input, Dense, Conv1D, BatchNormalization, Dropout, \n", " MultiHeadAttention, LayerNormalization, Lambda,\n", " Concatenate, Activation, Bidirectional, LSTM, Add\n", " )\n", " from keras.regularizers import l2\n", " from keras.optimizers import AdamW\n", " import tensorflow as tf\n", " import numpy as np\n", " import tensorflow_addons as tfa\n", " from tensorflow.keras.optimizers.schedules import CosineDecayRestarts\n", " \n", " # Input layer\n", " inputs = Input(shape=input_shape)\n", " \n", " # Feature groups definition\n", " feature_dims = {\n", " 'solar': [6, 7, 8, 9, 16, 18, 19, 20, 21],\n", " 'weather': [0, 1, 2, 3, 4, 5],\n", " 'temporal': [10, 11, 12, 13, 14, 15],\n", " 'derived': [22, 23, 24, 25, 26, 27, 28, 29, 30, 31],\n", " 'rolling': [33, 34, 35, 36, 37, 38, 39],\n", " 'lag': [40, 41, 42, 43, 44],\n", " 'performance': [45, 46, 47, 48, 49, 50]\n", " }\n", " \n", " # Feature extraction\n", " feature_tensors = {}\n", " for name, indices in feature_dims.items():\n", " valid_indices = [i for i in indices if i < input_shape[-1]]\n", " if valid_indices:\n", " feature_tensors[name] = Lambda(\n", " lambda x, idx=valid_indices: tf.gather(x, idx, axis=-1)\n", " )(inputs)\n", " \n", " # Feature processing with residual connections\n", " def process_feature_group(tensor, units, name):\n", " x = Conv1D(units, kernel_size=3, padding='same', activation='swish',\n", " kernel_regularizer=l2(l2_lambda))(tensor)\n", " x = BatchNormalization()(x)\n", " x = Dropout(0.2)(x)\n", " \n", " residual = Conv1D(units, kernel_size=1, padding='same')(tensor)\n", " x = Add()([x, residual])\n", " x = LayerNormalization()(x)\n", " \n", " return x\n", " \n", " # Process each feature group\n", " processed_features = {}\n", " for name, tensor in feature_tensors.items():\n", " units = 64 if name == 'solar' else 32 if name == 'weather' else 16\n", " processed_features[name] = process_feature_group(tensor, units, name)\n", " \n", " # Enhanced attention mechanism\n", " def attention_block(x, num_heads=4):\n", " attention_output = MultiHeadAttention(\n", " num_heads=num_heads, \n", " key_dim=x.shape[-1] // num_heads\n", " )(x, x)\n", " x = LayerNormalization()(x + attention_output)\n", " \n", " ffn = Dense(x.shape[-1] * 2, activation='swish')(x)\n", " ffn = Dropout(0.1)(ffn)\n", " ffn = Dense(x.shape[-1])(ffn)\n", " \n", " return LayerNormalization()(x + ffn)\n", " \n", " # Merge primary features with attention\n", " primary_features = [\n", " processed_features['solar'],\n", " processed_features['weather'],\n", " processed_features['performance']\n", " ]\n", " primary_context = Concatenate(axis=-1)(primary_features)\n", " primary_context = attention_block(primary_context)\n", " \n", " # Merge secondary features\n", " secondary_features = [\n", " processed_features[name] for name in ['temporal', 'rolling', 'lag']\n", " if name in processed_features\n", " ]\n", " if secondary_features:\n", " secondary_context = Concatenate(axis=-1)(secondary_features)\n", " secondary_context = attention_block(secondary_context)\n", " else:\n", " secondary_context = primary_context\n", " \n", " # Final feature merge\n", " combined = Concatenate(axis=-1)([\n", " primary_context, \n", " secondary_context,\n", " processed_features['derived']\n", " ])\n", " \n", " # Sequential processing with residual LSTM\n", " def residual_lstm_block(x, units):\n", " lstm_out = Bidirectional(LSTM(units, return_sequences=True))(x)\n", " residual = Conv1D(units * 2, kernel_size=1, padding='same')(x)\n", " x = Add()([lstm_out, residual])\n", " x = LayerNormalization()(x)\n", " return x\n", " \n", " x = residual_lstm_block(combined, 128)\n", " x = residual_lstm_block(x, 64)\n", " x = Bidirectional(LSTM(64))(x)\n", " x = Dropout(0.2)(x)\n", " \n", " # Classification branch\n", " class_x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n", " class_x = BatchNormalization()(class_x)\n", " class_x = Dropout(0.2)(class_x)\n", " class_x = Dense(64, activation='swish', kernel_regularizer=l2(l2_lambda))(class_x)\n", " class_output = Dense(1, activation='sigmoid', name='classification_output')(class_x)\n", " \n", " # Enhanced regression branch with multiple pathways\n", " def create_regression_pathway(x, name):\n", " x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n", " x = BatchNormalization()(x)\n", " x = Dropout(0.2)(x)\n", " \n", " residual = x\n", " x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n", " x = BatchNormalization()(x)\n", " x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n", " x = Add()([x, residual])\n", " \n", " x = Dense(64, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n", " return Dense(1, name=f'{name}_output')(x)\n", " \n", " # Create specialized regression pathways\n", " low_range = create_regression_pathway(x, 'low_range')\n", " mid_range = create_regression_pathway(x, 'mid_range')\n", " high_range = create_regression_pathway(x, 'high_range')\n", " \n", " # Create feature representation for attention\n", " feature_vector = Dense(32, activation='swish')(x)\n", " \n", " # Stack the range predictions\n", " range_stack = tf.stack([low_range, mid_range, high_range], axis=1)\n", " \n", " # Create attention mechanism\n", " attention_context = Dense(32, activation='swish')(feature_vector)\n", " \n", " # Calculate attention weights using the context\n", " attention_weights = Dense(3, activation='softmax')(attention_context)\n", " \n", " # Apply attention weights to combine predictions\n", " reg_output = Lambda(\n", " lambda inputs: tf.reduce_sum(inputs[0] * inputs[1], axis=1),\n", " name='regression_output'\n", " )([attention_weights, range_stack])\n", " \n", " # Final output with enhanced processing\n", " final_x = Dense(256, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n", " final_x = BatchNormalization()(final_x)\n", " final_x = Dropout(0.2)(final_x)\n", " \n", " residual = final_x\n", " final_x = Dense(256, activation='swish', kernel_regularizer=l2(l2_lambda))(final_x)\n", " final_x = BatchNormalization()(final_x)\n", " final_x = Dense(256, activation='swish', kernel_regularizer=l2(l2_lambda))(final_x)\n", " final_x = Add()([final_x, residual])\n", " \n", " final_x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(final_x)\n", " final_x = Dense(1)(final_x)\n", " final_output = Lambda(\n", " lambda x: tf.clip_by_value(x, min_output, max_output),\n", " name='final_output'\n", " )(final_x)\n", " \n", " # Build model\n", " model = Model(inputs=inputs, outputs=[class_output, reg_output, final_output])\n", " \n", " # Enhanced loss functions\n", " def enhanced_regression_loss(y_true, y_pred):\n", " mae = tf.abs(y_true - y_pred)\n", " mse = tf.square(y_true - y_pred)\n", " \n", " value_ranges = tf.cast(y_true > 2.0, tf.float32) * 1.5 + \\\n", " tf.cast(tf.logical_and(y_true <= 2.0, y_true > 1.0), tf.float32) * 1.2 + \\\n", " tf.cast(y_true <= 1.0, tf.float32)\n", " \n", " weighted_loss = (0.5 * mae + 0.5 * mse) * value_ranges\n", " return tf.reduce_mean(weighted_loss)\n", " \n", " def final_loss(y_true, y_pred):\n", " y_true = tf.clip_by_value(y_true, min_output, max_output)\n", " mae = tf.reduce_mean(tf.abs(y_true - y_pred))\n", " mse = tf.reduce_mean(tf.square(y_true - y_pred))\n", " return 0.5 * mae + 0.5 * mse\n", " \n", " # Learning rate schedule\n", " clr = CosineDecayRestarts(\n", " initial_learning_rate=2e-4,\n", " first_decay_steps=1000,\n", " t_mul=2.0,\n", " m_mul=0.9,\n", " alpha=1e-7\n", " )\n", " \n", " # Optimizer\n", " optimizer = AdamW(\n", " learning_rate=clr,\n", " weight_decay=0.01,\n", " clipnorm=1.0\n", " )\n", " \n", " # Compile model\n", " model.compile(\n", " optimizer=optimizer,\n", " loss={\n", " 'classification_output': 'binary_crossentropy',\n", " 'regression_output': enhanced_regression_loss,\n", " 'final_output': final_loss\n", " },\n", " loss_weights={\n", " 'classification_output': 0.2,\n", " 'regression_output': 0.4,\n", " 'final_output': 0.4\n", " }\n", " )\n", "\n", " # Plot model architecture\n", " try:\n", " plot_model(\n", " model,\n", " to_file=f'{folder_name}_model_architecture.png',\n", " show_shapes=True,\n", " show_layer_names=True,\n", " dpi=150,\n", " show_layer_activations=True\n", " )\n", " except Exception as e:\n", " print(f\"Warning: Could not plot model architecture: {e}\")\n", "\n", " return model\n", "\n", "\n", "def evaluate_solarenergy_predictions(y_true, y_pred, hour=None, folder_name=None):\n", " \"\"\"\n", " Comprehensive evaluation of solar energy predictions with detailed analysis and visualizations.\n", "\n", " Parameters:\n", " -----------\n", " y_true : array-like\n", " Actual solar energy values (kWh)\n", " y_pred : array-like\n", " Predicted solar energy values (kWh)\n", " hour : array-like, optional\n", " Array of hours corresponding to predictions, for temporal analysis\n", " folder_name : str, optional\n", " Directory to save analysis plots\n", "\n", " Returns:\n", " --------\n", " dict\n", " Dictionary containing all calculated metrics\n", " \"\"\"\n", "\n", " # Data preparation\n", " y_true = np.array(y_true).ravel()\n", " y_pred = np.array(y_pred).ravel()\n", " errors = y_pred - y_true\n", "\n", " # Basic metrics calculation\n", " mae_raw = mean_absolute_error(y_true, y_pred)\n", " rmse_raw = np.sqrt(mean_squared_error(y_true, y_pred))\n", " r2_raw = r2_score(y_true, y_pred)\n", "\n", " # Corrected MAPE calculation\n", " mask = y_true > 10 # Consider only values above 10 kWh\n", " if np.any(mask):\n", " mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100\n", " else:\n", " mape = np.nan\n", "\n", " # Corrected error margin accuracy\n", " within_5_percent = np.mean(np.abs(errors) <= 5) * 100 # Within 5 kWh\n", " within_10_percent = np.mean(np.abs(errors) <= 10) * 100 # Within 10 kWh\n", " within_20_percent = np.mean(np.abs(errors) <= 20) * 100 # Within 20 kWh\n", "\n", " # Energy level classification\n", " def get_energy_level(value):\n", " if value <= 0.5:\n", " return 'Very Low'\n", " elif value <= 2.0:\n", " return 'Low'\n", " elif value <= 4.0:\n", " return 'Moderate'\n", " elif value <= 6.0:\n", " return 'High'\n", " elif value <= 8.0:\n", " return 'Very High'\n", " else:\n", " return 'Extreme'\n", "\n", " # Calculate energy levels\n", " y_true_levels = [get_energy_level(v) for v in y_true]\n", " y_pred_levels = [get_energy_level(v) for v in y_pred]\n", " level_accuracy = np.mean([t == p for t, p in zip(y_true_levels, y_pred_levels)])\n", "\n", " unique_levels = sorted(list(set(y_true_levels + y_pred_levels)))\n", "\n", " # Print main metrics\n", " print(\"\\nSolar Energy Prediction Metrics:\")\n", " print(\"\\nAbsolute Metrics:\")\n", " print(f\"MAE: {mae_raw:.2f} kWh\")\n", " print(f\"RMSE: {rmse_raw:.2f} kWh\")\n", " print(f\"R² Score: {r2_raw:.3f}\")\n", " print(f\"MAPE: {mape:.2f}%\" if not np.isnan(mape) else \"MAPE: N/A (insufficient data)\")\n", "\n", " print(\"\\nAccuracy Metrics:\")\n", " print(f\"Within ±5 kWh: {within_5_percent:.1f}%\")\n", " print(f\"Within ±10 kWh: {within_10_percent:.1f}%\")\n", " print(f\"Within ±20 kWh: {within_20_percent:.1f}%\")\n", "\n", " print(\"\\nLevel Accuracy:\")\n", " print(f\"Level Accuracy: {level_accuracy * 100:.1f}%\")\n", "\n", " # Confusion matrix for energy levels\n", " cm = confusion_matrix(y_true_levels, y_pred_levels, labels=unique_levels)\n", " print(\"\\nConfusion Matrix for Energy Levels:\")\n", " cm_df = pd.DataFrame(\n", " cm,\n", " columns=unique_levels,\n", " index=unique_levels\n", " )\n", " print(cm_df)\n", "\n", " # Time period analysis\n", " if hour is not None:\n", " day_periods = {\n", " 'Morning (5-11)': (5, 11),\n", " 'Noon (11-13)': (11, 13),\n", " 'Afternoon (13-17)': (13, 17),\n", " 'Evening (17-21)': (17, 21),\n", " 'Night (21-5)': (21, 5)\n", " }\n", "\n", " print(\"\\nAnalysis by Time Period:\")\n", " for period, (start, end) in day_periods.items():\n", " if start < end:\n", " mask = (hour >= start) & (hour < end)\n", " else:\n", " mask = (hour >= start) | (hour < end)\n", "\n", " if np.any(mask):\n", " period_mae = mean_absolute_error(y_true[mask], y_pred[mask])\n", "\n", " # Corrected period MAPE calculation\n", " period_mask = mask & (y_true > 10)\n", " if np.any(period_mask):\n", " period_mape = np.mean(np.abs((y_true[period_mask] - y_pred[period_mask]) / y_true[period_mask])) * 100\n", " print(f\"\\n{period}:\")\n", " print(f\"MAE: {period_mae:.2f} kWh\")\n", " print(f\"MAPE: {period_mape:.2f}%\")\n", " else:\n", " print(f\"\\n{period}:\")\n", " print(f\"MAE: {period_mae:.2f} kWh\")\n", " print(\"MAPE: N/A (insufficient data)\")\n", "\n", " # Visualizations\n", " if folder_name is not None:\n", " try:\n", " # Figure 1: Main analysis plots\n", " plt.figure(figsize=(20, 15))\n", "\n", " # Plot 1: Scatter plot of actual vs predicted values\n", " plt.subplot(3, 2, 1)\n", " plt.scatter(y_true, y_pred, alpha=0.5)\n", " plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)\n", " plt.xlabel('Actual Energy (kWh)')\n", " plt.ylabel('Predicted Energy (kWh)')\n", " plt.title('Actual vs Predicted Values')\n", " plt.grid(True)\n", "\n", " # Plot 2: Absolute error distribution\n", " plt.subplot(3, 2, 2)\n", " plt.hist(errors, bins=50, alpha=0.7)\n", " plt.xlabel('Prediction Error (kWh)')\n", " plt.ylabel('Frequency')\n", " plt.title('Error Distribution')\n", " plt.grid(True)\n", "\n", " # Plot 3: Percentage error distribution (only for values > 0.5 kWh)\n", " plt.subplot(3, 2, 3)\n", " mask = y_true > 0.5\n", " if np.any(mask):\n", " percentage_errors = ((y_pred[mask] - y_true[mask]) / y_true[mask]) * 100\n", " plt.hist(np.clip(percentage_errors, -100, 100), bins=50, alpha=0.7)\n", " plt.xlabel('Percentage Error (%)')\n", " plt.ylabel('Frequency')\n", " plt.title('Percentage Error Distribution (for values > 0.5 kWh)')\n", " plt.grid(True)\n", "\n", " # Plot 4: Errors vs actual values\n", " plt.subplot(3, 2, 4)\n", " plt.scatter(y_true, errors, alpha=0.5)\n", " plt.axhline(y=0, color='r', linestyle='--')\n", " plt.xlabel('Actual Energy (kWh)')\n", " plt.ylabel('Error (kWh)')\n", " plt.title('Errors vs Actual Values')\n", " plt.grid(True)\n", "\n", " # Plot 5: Error boxplot by Energy level\n", " plt.subplot(3, 2, 5)\n", " sns.boxplot(x=[get_energy_level(v) for v in y_true], y=errors)\n", " plt.xticks(rotation=45)\n", " plt.xlabel('Energy Level')\n", " plt.ylabel('Error (kWh)')\n", " plt.title('Error Distribution by Level')\n", "\n", " # Plot 6: Confusion matrix heatmap\n", " plt.subplot(3, 2, 6)\n", " sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')\n", " plt.title('Confusion Matrix')\n", " plt.xticks(rotation=45)\n", " plt.yticks(rotation=45)\n", "\n", " plt.tight_layout()\n", " filename = f'{folder_name}_energy_analysis.png'\n", " plt.savefig(filename, dpi=300, bbox_inches='tight')\n", " print(f\"\\nPlot saved as: {filename}\")\n", " plt.close()\n", "\n", " except Exception as e:\n", " print(f\"\\nError saving plots: {str(e)}\")\n", "\n", " # Additional error statistics\n", " print(\"\\nError Statistics:\")\n", " print(f\"Mean error: {np.mean(errors):.3f}\")\n", " print(f\"Error standard deviation: {np.std(errors):.3f}\")\n", " print(f\"Median error: {np.median(errors):.3f}\")\n", " print(f\"95th percentile absolute error: {np.percentile(np.abs(errors), 95):.3f}\")\n", "\n", " # Return structured metrics\n", " metrics = {\n", " 'absolute': {\n", " 'mae': mae_raw,\n", " 'rmse': rmse_raw,\n", " 'r2': r2_raw,\n", " 'mape': float(mape) if not np.isnan(mape) else None\n", " },\n", " 'accuracy': {\n", " 'within_5_wm2': float(within_5_percent),\n", " 'within_10_wm2': float(within_10_percent),\n", " 'within_20_wm2': float(within_20_percent)\n", " },\n", " 'categorical': {\n", " 'level_accuracy': float(level_accuracy)\n", " },\n", " 'error_stats': {\n", " 'mean': float(np.mean(errors)),\n", " 'std': float(np.std(errors)),\n", " 'median': float(np.median(errors)),\n", " 'p95_abs': float(np.percentile(np.abs(errors), 95))\n", " }\n", " }\n", "\n", " return metrics\n", "\n", "\n", "def plot_training_history(history, folder_name=None):\n", " \"\"\"\n", " Visualize and save training history for the hybrid model\n", " \"\"\"\n", " plt.figure(figsize=(15, 10))\n", "\n", " # Loss plots\n", " plt.subplot(2, 2, 1)\n", " plt.plot(history.history['classification_output_loss'], label='Class Loss')\n", " plt.plot(history.history['regression_output_loss'], label='Reg Loss')\n", " plt.plot(history.history['final_output_loss'], label='Final Loss')\n", " plt.plot(history.history['val_classification_output_loss'], label='Val Class Loss')\n", " plt.plot(history.history['val_regression_output_loss'], label='Val Reg Loss')\n", " plt.plot(history.history['val_final_output_loss'], label='Val Final Loss')\n", " plt.title('Model Losses')\n", " plt.xlabel('Epoch')\n", " plt.ylabel('Loss')\n", " plt.legend()\n", " plt.grid(True)\n", "\n", " # Classification metrics\n", " plt.subplot(2, 2, 2)\n", " plt.plot(history.history['classification_output_accuracy'], label='Class Acc')\n", " plt.plot(history.history['val_classification_output_accuracy'], label='Val Class Acc')\n", " plt.plot(history.history['classification_output_auc'], label='Class AUC')\n", " plt.plot(history.history['val_classification_output_auc'], label='Val Class AUC')\n", " plt.title('Classification Metrics')\n", " plt.xlabel('Epoch')\n", " plt.ylabel('Metric Value')\n", " plt.legend()\n", " plt.grid(True)\n", "\n", " # Regression metrics\n", " plt.subplot(2, 2, 3)\n", " plt.plot(history.history['regression_output_mae'], label='Reg MAE')\n", " plt.plot(history.history['val_regression_output_mae'], label='Val Reg MAE')\n", " plt.title('Regression MAE')\n", " plt.xlabel('Epoch')\n", " plt.ylabel('MAE')\n", " plt.legend()\n", " plt.grid(True)\n", "\n", " # Final output metrics\n", " plt.subplot(2, 2, 4)\n", " plt.plot(history.history['final_output_mae'], label='Final MAE')\n", " plt.plot(history.history['val_final_output_mae'], label='Val Final MAE')\n", " plt.title('Final Output MAE')\n", " plt.xlabel('Epoch')\n", " plt.ylabel('MAE')\n", " plt.legend()\n", " plt.grid(True)\n", "\n", " plt.tight_layout()\n", "\n", " if folder_name is not None:\n", " filename = f'{folder_name}_training_history.png'\n", " plt.savefig(filename, dpi=300, bbox_inches='tight')\n", " print(f\"\\nTraining history plot saved as: {filename}\")\n", "\n", " # Save history to JSON\n", " history_dict = history.history\n", " json_filename = f'{folder_name}_training_history.json'\n", " with open(json_filename, 'w') as f:\n", " json.dump(history_dict, f)\n", " print(f\"Training history saved as: {json_filename}\")\n", "\n", " plt.show()\n", "\n", "def calculate_metrics(y_true, y_class, y_reg, y_final, min_output, max_output):\n", " \"\"\"\n", " Calculates comprehensive metrics for the solar energy prediction model.\n", " \n", " Parameters:\n", " -----------\n", " y_true : array-like\n", " Ground truth values\n", " y_class : array-like\n", " Classification predictions (probability of non-zero values)\n", " y_reg : array-like\n", " Regression predictions (unrestricted values)\n", " y_final : array-like\n", " Final clipped predictions\n", " min_output : float\n", " Minimum allowed output value\n", " max_output : float\n", " Maximum allowed output value\n", " \n", " Returns:\n", " --------\n", " dict\n", " Dictionary containing all calculated metrics\n", " \"\"\"\n", " from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix\n", " \n", " # Ensure proper array formatting and dimensionality\n", " y_true = np.array(y_true).flatten()\n", " y_class = np.array(y_class).flatten()\n", " y_reg = np.array(y_reg).flatten()\n", " y_final = np.array(y_final).flatten()\n", " \n", " # Validate input dimensions\n", " assert len(y_true) == len(y_class) == len(y_reg) == len(y_final), \\\n", " \"All input arrays must have the same length\"\n", " \n", " # Classification metrics with error handling\n", " print(\"\\nClassification Metrics:\")\n", " try:\n", " y_true_binary = (y_true > 0).astype(int)\n", " y_pred_binary = (y_class > 0.5).astype(int)\n", " \n", " accuracy = np.mean((y_class > 0.5) == (y_true > 0)) * 100\n", " auc_roc = roc_auc_score(y_true > 0, y_class)\n", " print(f\"Accuracy: {accuracy:.2f}%\")\n", " print(f\"AUC-ROC: {auc_roc:.4f}\")\n", " \n", " print(\"\\nConfusion Matrix:\")\n", " conf_matrix = confusion_matrix(y_true_binary, y_pred_binary)\n", " print(conf_matrix)\n", " \n", " print(\"\\nClassification Report:\")\n", " class_report = classification_report(\n", " y_true_binary, \n", " y_pred_binary,\n", " target_names=['Zero', 'Non-Zero'],\n", " digits=4\n", " )\n", " print(class_report)\n", " except Exception as e:\n", " print(f\"Error in classification metrics calculation: {str(e)}\")\n", " \n", " # Regression metrics with error handling\n", " print(\"\\nRegression Metrics (non-zero values):\")\n", " mask_nonzero = y_true > 0\n", " if np.any(mask_nonzero):\n", " try:\n", " y_true_nonzero = y_true[mask_nonzero]\n", " y_reg_nonzero = y_reg[mask_nonzero]\n", " \n", " # Range validation\n", " out_of_range = np.sum(\n", " (y_reg_nonzero < min_output) | \n", " (y_reg_nonzero > max_output)\n", " )\n", " \n", " # Error metrics with numerical stability\n", " epsilon = 1e-7\n", " diff = np.abs((y_true_nonzero - y_reg_nonzero) / \n", " (y_true_nonzero + epsilon))\n", " diff = np.clip(diff, 0, 1)\n", " \n", " # Calculate metrics\n", " mape = np.mean(diff) * 100\n", " within_10_percent = np.mean(diff <= 0.10) * 100\n", " mae = np.mean(np.abs(y_true_nonzero - y_reg_nonzero))\n", " rmse = np.sqrt(np.mean(np.square(y_true_nonzero - y_reg_nonzero)))\n", " \n", " print(f\"Out of range: {out_of_range} predictions\")\n", " print(f\"MAPE: {mape:.2f}%\")\n", " print(f\"Within ±10%: {within_10_percent:.2f}%\")\n", " print(f\"MAE: {mae:.2f}\")\n", " print(f\"RMSE: {rmse:.2f}\")\n", " except Exception as e:\n", " print(f\"Error in regression metrics calculation: {str(e)}\")\n", " else:\n", " print(\"No non-zero values in this batch\")\n", " \n", " # Final output metrics with error handling\n", " print(\"\\nFinal Combined Output Metrics:\")\n", " try:\n", " # Ensure outputs are within bounds\n", " out_of_range = np.sum((y_final < min_output) | (y_final > max_output))\n", " \n", " # Calculate metrics with numerical stability\n", " epsilon = 1e-7\n", " diff = np.abs((y_true - y_final) / (y_true + epsilon))\n", " diff = np.clip(diff, 0, 1)\n", " \n", " mape = np.mean(diff) * 100\n", " within_2_percent = np.mean(diff <= 0.02) * 100\n", " within_5_percent = np.mean(diff <= 0.05) * 100\n", " within_10_percent = np.mean(diff <= 0.10) * 100\n", " within_20_percent = np.mean(diff <= 0.20) * 100\n", " mae = np.mean(np.abs(y_true - y_final))\n", " rmse = np.sqrt(np.mean(np.square(y_true - y_final)))\n", " \n", " print(f\"Out of range: {out_of_range} predictions\")\n", " print(f\"MAPE: {mape:.2f}%\")\n", " print(f\"Within ±2%: {within_2_percent:.2f}%\")\n", " print(f\"Within ±5%: {within_5_percent:.2f}%\")\n", " print(f\"Within ±10%: {within_10_percent:.2f}%\")\n", " print(f\"Within ±20%: {within_20_percent:.2f}%\")\n", " print(f\"MAE: {mae:.2f}\")\n", " print(f\"RMSE: {rmse:.2f}\")\n", " except Exception as e:\n", " print(f\"Error in final output metrics calculation: {str(e)}\")\n", "\n", "def train_hybrid_model(model, X_train, y_train, X_test, y_test, epochs=100, batch_size=32, folder_name='solarenergy', min_output=0, max_output=1):\n", " \"\"\"\n", " Advanced training function for the hybrid solar energy model\n", " \"\"\" \n", " # Prepare binary targets for classification\n", " y_train_binary = (y_train > 0).astype(float)\n", " y_test_binary = (y_test > 0).astype(float)\n", "\n", " # Training targets dictionary - usando i nomi esatti degli output del modello\n", " train_targets = {\n", " 'classification_output': y_train_binary,\n", " 'regression_output': y_train, # Questo nome corrisponde a quello nel modello\n", " 'final_output': y_train\n", " }\n", "\n", " # Validation targets dictionary\n", " test_targets = {\n", " 'classification_output': y_test_binary,\n", " 'regression_output': y_test, # Questo nome corrisponde a quello nel modello\n", " 'final_output': y_test\n", " }\n", "\n", " def evaluate_epoch(epoch, logs):\n", " if epoch % 10 == 0:\n", " print(f\"\\nEpoch {epoch + 1} Detailed Metrics:\")\n", " predictions = model.predict(X_test, verbose=0)\n", " calculate_metrics(y_test, *predictions, min_output, max_output)\n", "\n", " callbacks = [\n", " tf.keras.callbacks.EarlyStopping(\n", " monitor='val_final_output_loss',\n", " patience=35,\n", " restore_best_weights=True,\n", " mode='min',\n", " verbose=1,\n", " min_delta=1e-5\n", " ),\n", " tf.keras.callbacks.ModelCheckpoint(\n", " filepath=f'{folder_name}_best_model.h5',\n", " monitor='val_final_output_loss',\n", " save_best_only=True,\n", " mode='min',\n", " save_weights_only=True # Modificato a True per evitare problemi di serializzazione\n", " ),\n", " tf.keras.callbacks.TensorBoard(\n", " log_dir=f'./{folder_name}_logs',\n", " histogram_freq=1,\n", " write_graph=True,\n", " update_freq='epoch'\n", " ),\n", " tf.keras.callbacks.LambdaCallback(on_epoch_end=evaluate_epoch),\n", " tf.keras.callbacks.TerminateOnNaN()\n", " ]\n", "\n", " '''\n", " tf.keras.callbacks.ReduceLROnPlateau(\n", " monitor='val_final_output_loss',\n", " factor=0.8,\n", " patience=10,\n", " verbose=1,\n", " mode='min',\n", " min_delta=1e-4,\n", " cooldown=2,\n", " min_lr=1e-7\n", " ),\n", " '''\n", " try:\n", " history = model.fit(\n", " X_train,\n", " train_targets,\n", " validation_data=(X_test, test_targets),\n", " epochs=epochs,\n", " batch_size=batch_size,\n", " callbacks=callbacks,\n", " verbose=1,\n", " shuffle=False\n", " )\n", "\n", " print(\"\\nTraining completed successfully!\")\n", "\n", " # Final evaluation\n", " predictions = model.predict(X_test, verbose=0)\n", " calculate_metrics(y_test, *predictions, min_output, max_output)\n", "\n", " return history\n", "\n", " except Exception as e:\n", " print(f\"\\nError during training: {str(e)}\")\n", " print(\"\\nModel output names:\", [output.name for output in model.outputs])\n", " print(\"Training targets keys:\", train_targets.keys())\n", " raise\n", "\n", " finally:\n", " tf.keras.backend.clear_session()\n", "\n", "\n", "def integrate_predictions(df, predictions, sequence_length=24):\n", " \"\"\"\n", " Integrates solar energy predictions into the original dataset for pre-2010 data.\n", "\n", " Parameters:\n", " -----------\n", " df : pandas.DataFrame\n", " Original dataset\n", " predictions : tuple\n", " Tuple containing (classification_pred, regression_pred, final_pred)\n", " - classification_pred: probability of non-zero values\n", " - regression_pred: predicted values (used for non-zero cases)\n", " - final_pred: final combined predictions\n", " sequence_length : int\n", " Sequence length used for predictions\n", "\n", " Returns:\n", " --------\n", " pandas.DataFrame\n", " Updated dataset with solar energy predictions and additional prediction details\n", " \"\"\"\n", " # Convert datetime to datetime format if not already\n", " df['datetime'] = pd.to_datetime(df['datetime'])\n", "\n", " # Identify pre-2010 rows\n", " mask_pre_2010 = df['datetime'].dt.year < 2010\n", "\n", " # Unpack predictions\n", " classification_pred, regression_pred, final_pred = predictions\n", "\n", " # Create temporary DataFrame with all predictions\n", " dates_pre_2010 = df[mask_pre_2010]['datetime'].iloc[sequence_length - 1:]\n", " predictions_df = pd.DataFrame({\n", " 'datetime': dates_pre_2010,\n", " 'solarenergy_predicted': final_pred.flatten(),\n", " 'solarenergy_classification': classification_pred.flatten(),\n", " 'solarenergy_regression': regression_pred.flatten()\n", " })\n", "\n", " # Merge with original dataset\n", " df = df.merge(predictions_df, on='datetime', how='left')\n", "\n", " # Update solar energy column where missing\n", " df['solarenergy'] = df['solarenergy'].fillna(df['solarenergy_predicted'])\n", "\n", " # Print detailed statistics\n", " print(\"\\nPrediction Integration Statistics:\")\n", " print(f\"Added {len(final_pred)} predictions to dataset\")\n", " print(f\"Rows with solar energy after integration: {df['solarenergy'].notna().sum()}\")\n", "\n", " # Analyze prediction components for the filled values\n", " mask_filled = df['solarenergy'] == df['solarenergy_predicted']\n", " if mask_filled.any():\n", " filled_data = df[mask_filled]\n", "\n", " print(\"\\nFilled Values Analysis:\")\n", " print(f\"Zero predictions (classification < 0.5): {(filled_data['solarenergy_classification'] < 0.5).sum()}\")\n", " print(f\"Non-zero predictions (classification >= 0.5): {(filled_data['solarenergy_classification'] >= 0.5).sum()}\")\n", "\n", " # Distribution of predicted values\n", " non_zero_pred = filled_data[filled_data['solarenergy_predicted'] > 0]\n", " if len(non_zero_pred) > 0:\n", " print(f\"\\nNon-zero predictions statistics:\")\n", " print(f\"Mean: {non_zero_pred['solarenergy_predicted'].mean():.2f}\")\n", " print(f\"Median: {non_zero_pred['solarenergy_predicted'].median():.2f}\")\n", " print(f\"Std: {non_zero_pred['solarenergy_predicted'].std():.2f}\")\n", "\n", " # Optionally, you can keep or remove the intermediate prediction columns\n", " columns_to_drop = ['solarenergy_predicted', 'solarenergy_classification',\n", " 'solarenergy_regression']\n", " df = df.drop(columns_to_drop, axis=1)\n", "\n", " return df" ] }, { "cell_type": "code", "execution_count": 9, "id": "b3b0c2e65ddf484", "metadata": {}, "outputs": [], "source": [ "def analyze_distribution(data, solar_column='solarenergy', name = 'Solar Energy'):\n", " \"\"\"\n", " Analizza dettagliatamente la distribuzione della variabile solarenergy.\n", "\n", " Parameters:\n", " -----------\n", " data : pandas.DataFrame\n", " DataFrame contenente la colonna solarenergy\n", " solar_column : str, default='solarenergy'\n", " Nome della colonna da analizzare\n", "\n", " Returns:\n", " --------\n", " dict\n", " Dizionario contenente le statistiche principali\n", " \"\"\"\n", "\n", " # Creiamo una figura con più subplot\n", " fig = plt.figure(figsize=(20, 12))\n", "\n", " # 1. Statistiche di base\n", " stats_dict = {\n", " 'count': len(data[solar_column]),\n", " 'missing': data[solar_column].isnull().sum(),\n", " 'zeros': (data[solar_column] == 0).sum(),\n", " 'mean': data[solar_column].mean(),\n", " 'median': data[solar_column].median(),\n", " 'std': data[solar_column].std(),\n", " 'min': data[solar_column].min(),\n", " 'max': data[solar_column].max(),\n", " 'skewness': stats.skew(data[solar_column].dropna()),\n", " 'kurtosis': stats.kurtosis(data[solar_column].dropna())\n", " }\n", "\n", " # Calcolo dei percentili\n", " percentiles = [1, 5, 10, 25, 50, 75, 90, 95, 99]\n", " for p in percentiles:\n", " stats_dict[f'percentile_{p}'] = np.percentile(data[solar_column].dropna(), p)\n", "\n", " # 2. Visualizzazioni\n", "\n", " # 2.1 Distribuzione\n", " plt.subplot(2, 2, 1)\n", " sns.histplot(data=data, x=solar_column, kde=True)\n", " plt.title(f'Distribuzione di {name}')\n", " plt.xlabel(f'{name}')\n", " plt.ylabel('Frequenza')\n", "\n", " # 2.2 Box Plot\n", " plt.subplot(2, 2, 2)\n", " sns.boxplot(y=data[solar_column])\n", " plt.title(f'Box Plot di {name}')\n", "\n", " # 2.3 QQ Plot\n", " plt.subplot(2, 2, 3)\n", " stats.probplot(data[solar_column].dropna(), dist=\"norm\", plot=plt)\n", " plt.title(f'Q-Q Plot di {name}')\n", "\n", " # 2.4 Distribuzione Log-trasformata\n", " plt.subplot(2, 2, 4)\n", " sns.histplot(data=np.log1p(data[solar_column]), kde=True)\n", " plt.title(f'Distribuzione Log-trasformata di {name}')\n", " plt.xlabel(f'Log({name} + 1)')\n", " plt.ylabel('Frequenza')\n", "\n", " plt.tight_layout()\n", " plt.show()\n", "\n", " # 3. Analisi temporale se disponibile\n", " if 'timestamp' in data.columns or 'datetime' in data.columns:\n", " time_col = 'timestamp' if 'timestamp' in data.columns else 'datetime'\n", " if isinstance(data[time_col].iloc[0], (int, float)):\n", " data['temp_datetime'] = pd.to_datetime(data[time_col], unit='s')\n", " else:\n", " data['temp_datetime'] = pd.to_datetime(data[time_col])\n", "\n", " # Plot temporale\n", " plt.figure(figsize=(15, 6))\n", " plt.plot(data['temp_datetime'], data[solar_column])\n", " plt.title(f'Serie Temporale di {name}')\n", " plt.xlabel('Data')\n", " plt.ylabel(f'{name}')\n", " plt.xticks(rotation=45)\n", " plt.tight_layout()\n", " plt.show()\n", "\n", " # Analisi stagionale\n", " data['month'] = data['temp_datetime'].dt.month\n", " seasonal_stats = data.groupby('month')[solar_column].agg(['mean', 'std', 'median'])\n", "\n", " plt.figure(figsize=(12, 6))\n", " seasonal_stats['mean'].plot(kind='bar')\n", " plt.title(f'Media Mensile di {name}')\n", " plt.xlabel('Mese')\n", " plt.ylabel(f'{name} Media')\n", " plt.tight_layout()\n", " plt.show()\n", "\n", " # 4. Stampa delle statistiche principali\n", " print(f\"\\nStatistiche principali di {name}:\")\n", " print(\"-\" * 50)\n", " for key, value in stats_dict.items():\n", " print(f\"{key:15}: {value:,.4f}\")\n", "\n", " # 5. Suggerimenti per la normalizzazione\n", " print(\"\\nSuggerimenti per la normalizzazione:\")\n", " print(\"-\" * 50)\n", "\n", " skewness = abs(stats_dict['skewness'])\n", " if skewness > 1:\n", " print(\"- La distribuzione è fortemente asimmetrica (skewness > 1)\")\n", " print(\"- Considerare una trasformazione logaritmica: np.log1p(x)\")\n", "\n", " range_ratio = stats_dict['max'] / stats_dict['std']\n", " if range_ratio > 10:\n", " print(\"- La variabile ha una scala molto ampia\")\n", " print(\"- Considerare RobustScaler o StandardScaler per la normalizzazione\")\n", "\n", " zero_ratio = stats_dict['zeros'] / stats_dict['count']\n", " if zero_ratio > 0.1:\n", " print(f\"- Alta presenza di zeri ({zero_ratio:.2%})\")\n", " print(\"- Considerare un modello in due parti: classificazione degli zeri + regressione sui valori non-zero\")\n", "\n", " return stats_dict" ] }, { "cell_type": "code", "execution_count": 6, "id": "1b1ee91d1573ec66", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initializing solar energy model training...\n", "\n", "1. Preparing data...\n", "\n", "Selected features:\n", "Number of features: 66\n", "Features list: ['uvindex', 'cloudcover', 'visibility', 'temp', 'pressure', 'humidity', 'solarradiation', 'solar_elevation', 'solar_angle', 'day_length', 'hour_sin', 'hour_cos', 'day_of_year_sin', 'day_of_year_cos', 'month_sin', 'month_cos', 'solar_noon', 'daylight_correction', 'clear_sky_index', 'atmospheric_attenuation', 'theoretical_radiation', 'expected_radiation', 'cloud_elevation', 'visibility_elevation', 'uv_cloud_interaction', 'temp_radiation_potential', 'air_mass_index', 'atmospheric_stability', 'vapor_pressure_deficit', 'diffusion_index', 'atmospheric_transmittance', 'temp_humidity_interaction', 'clear_sky_factor', 'cloud_rolling_12h', 'temp_rolling_12h', 'uv_rolling_12h', 'cloudcover_rolling_mean_6h', 'temp_rolling_mean_6h', 'energy_rolling_mean_6h', 'uv_rolling_mean_6h', 'energy_volatility', 'uv_volatility', 'temp_1h_lag', 'cloudcover_1h_lag', 'humidity_1h_lag', 'energy_lag_1h', 'uv_lag_1h', 'temp_losses', 'soiling_loss_factor', 'estimated_efficiency', 'production_potential', 'system_performance_ratio', 'conversion_efficiency_ratio', 'clear_sky_duration', 'weather_variability_index', 'temp_stability', 'humidity_stability', 'cloudcover_stability', 'season_Spring', 'season_Summer', 'season_Autumn', 'season_Winter', 'time_period_Morning', 'time_period_Afternoon', 'time_period_Evening', 'time_period_Night']\n", "Training data shape: (112882, 24, 66)\n", "Test data shape: (16849, 24, 66)\n", "Saving scaler X to: 2024-11-27_13-56_scale_X.joblib\n", "Saving scaler X to: 2024-11-27_13-56_scale_y.joblib\n", "Saving features to: 2024-11-27_13-56_features.json\n" ] } ], "source": [ "df = pd.read_parquet('../../sources/weather_data_solarradiation.parquet')\n", "\n", "print(\"Initializing solar energy model training...\")\n", "\n", "# Data preparation\n", "print(\"\\n1. Preparing data...\")\n", "X_train_seq, X_test_seq, y_train, y_test, scaler_X, scaler_y, features, X_to_predict_seq = prepare_hybrid_data(df)\n", "\n", "print(f\"Training data shape: {X_train_seq.shape}\")\n", "print(f\"Test data shape: {X_test_seq.shape}\")\n", "\n", "# Save or load scaler and features\n", "scaler_X_path = f'{folder_name}_scale_X.joblib'\n", "scaler_y_path = f'{folder_name}_scale_y.joblib'\n", "features_path = f'{folder_name}_features.json'\n", "model_path = f'{folder_name}_best_model.h5'\n", "history_path = f'{folder_name}_training_history.json'\n", "\n", "if os.path.exists(scaler_X_path):\n", " print(f\"Loading existing scaler X from: {scaler_X_path}\")\n", " scaler = joblib.load(scaler_X_path)\n", "else:\n", " print(f\"Saving scaler X to: {scaler_X_path}\")\n", " joblib.dump(scaler_X, scaler_X_path)\n", "\n", "if os.path.exists(scaler_y_path):\n", " print(f\"Loading existing scaler X from: {scaler_y_path}\")\n", " scaler = joblib.load(scaler_y_path)\n", "else:\n", " print(f\"Saving scaler X to: {scaler_y_path}\")\n", " joblib.dump(scaler_y, scaler_y_path)\n", "\n", "if os.path.exists(features_path):\n", " print(f\"Loading existing features from: {features_path}\")\n", " with open(features_path, 'r') as f:\n", " features = json.load(f)\n", "else:\n", " print(f\"Saving features to: {features_path}\")\n", " with open(features_path, 'w') as f:\n", " json.dump(features, f)\n", "\n", "# Data quality verification\n", "if np.isnan(X_train_seq).any() or np.isnan(y_train).any():\n", " raise ValueError(\"Found NaN values in training data\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "096e79e3-7a3d-4e17-9a30-4d0747ee2d40", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "2. Creating model...\n", "\\Min dataset solar energy : 0.0 - Scaled Version : 0.0\n", "\n", "Max dataset solar energy : 4.0 - Scaled Version : 3.3333333333333335\n", "Max dataset solar energy increased by 15% : 4.6 - Scaled Version : 3.833333333333333\n", "\n", "Class distribution in training set:\n", "Zeros: 56899 (50.41%)\n", "Non-zeros: 55983 (49.59%)\n", "\n", "Class distribution in test set:\n", "Zeros: 8576 (50.90%)\n", "Non-zeros: 8273 (49.10%)\n", "\n", "Model output names: ['classification_output', 'regression_output', 'final_output']\n", "\n", "4. Starting training...\n", "Epoch 1/150\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2024-11-27 14:02:24.816496: W tensorflow/core/framework/op_kernel.cc:1827] INVALID_ARGUMENT: required broadcastable shapes\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Error during training: Graph execution error:\n", "\n", "Detected at node model/regression_output/mul defined at (most recent call last):\n", " File \"\", line 198, in _run_module_as_main\n", "\n", " File \"\", line 88, in _run_code\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel_launcher.py\", line 17, in \n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py\", line 1046, in launch_instance\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py\", line 736, in start\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py\", line 195, in start\n", "\n", " File \"/usr/lib/python3.11/asyncio/base_events.py\", line 604, in run_forever\n", "\n", " File \"/usr/lib/python3.11/asyncio/base_events.py\", line 1909, in _run_once\n", "\n", " File \"/usr/lib/python3.11/asyncio/events.py\", line 80, in _run\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 516, in dispatch_queue\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 505, in process_one\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 412, in dispatch_shell\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 740, in execute_request\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py\", line 422, in do_execute\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py\", line 546, in run_cell\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3024, in run_cell\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3079, in _run_cell\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py\", line 129, in _pseudo_sync_runner\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3284, in run_cell_async\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3466, in run_ast_nodes\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3526, in run_code\n", "\n", " File \"/tmp/ipykernel_341907/1713792660.py\", line 47, in \n", "\n", " File \"/tmp/ipykernel_341907/594795021.py\", line 730, in train_hybrid_model\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1783, in fit\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1377, in train_function\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1360, in step_function\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1349, in run_step\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1126, in train_step\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 589, in __call__\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/base_layer.py\", line 1149, in __call__\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 96, in error_handler\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/functional.py\", line 515, in call\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/functional.py\", line 672, in _run_internal_graph\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/base_layer.py\", line 1149, in __call__\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 96, in error_handler\n", "\n", " File \"/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/lambda_layer.py\", line 212, in call\n", "\n", " File \"/tmp/ipykernel_341907/594795021.py\", line 153, in \n", "\n", "required broadcastable shapes\n", "\t [[{{node model/regression_output/mul}}]] [Op:__inference_train_function_106117]\n", "\n", "Model output names: ['classification_output/Sigmoid:0', 'regression_output/Sum:0', 'final_output/clip_by_value:0']\n", "Training targets keys: dict_keys(['classification_output', 'regression_output', 'final_output'])\n" ] }, { "ename": "InvalidArgumentError", "evalue": "Graph execution error:\n\nDetected at node model/regression_output/mul defined at (most recent call last):\n File \"\", line 198, in _run_module_as_main\n\n File \"\", line 88, in _run_code\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel_launcher.py\", line 17, in \n\n File \"/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py\", line 1046, in launch_instance\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py\", line 736, in start\n\n File \"/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py\", line 195, in start\n\n File \"/usr/lib/python3.11/asyncio/base_events.py\", line 604, in run_forever\n\n File \"/usr/lib/python3.11/asyncio/base_events.py\", line 1909, in _run_once\n\n File \"/usr/lib/python3.11/asyncio/events.py\", line 80, in _run\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 516, in dispatch_queue\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 505, in process_one\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 412, in dispatch_shell\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 740, in execute_request\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py\", line 422, in do_execute\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py\", line 546, in run_cell\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3024, in run_cell\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3079, in _run_cell\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py\", line 129, in _pseudo_sync_runner\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3284, in run_cell_async\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3466, in run_ast_nodes\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3526, in run_code\n\n File \"/tmp/ipykernel_341907/1713792660.py\", line 47, in \n\n File \"/tmp/ipykernel_341907/594795021.py\", line 730, in train_hybrid_model\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1783, in fit\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1377, in train_function\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1360, in step_function\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1349, in run_step\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1126, in train_step\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 589, in __call__\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/base_layer.py\", line 1149, in __call__\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 96, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/functional.py\", line 515, in call\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/functional.py\", line 672, in _run_internal_graph\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/base_layer.py\", line 1149, in __call__\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 96, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/lambda_layer.py\", line 212, in call\n\n File \"/tmp/ipykernel_341907/594795021.py\", line 153, in \n\nrequired broadcastable shapes\n\t [[{{node model/regression_output/mul}}]] [Op:__inference_train_function_106117]", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[11], line 47\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mModel output names:\u001b[39m\u001b[38;5;124m\"\u001b[39m, output_names)\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m4. Starting training...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 47\u001b[0m history \u001b[38;5;241m=\u001b[39m \u001b[43mtrain_hybrid_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[43mX_train\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX_train_seq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my_train\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 51\u001b[0m \u001b[43m \u001b[49m\u001b[43mX_test\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX_test_seq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my_test\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m150\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m512\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[43mfolder_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfolder_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[43mmin_output\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmin_val_scaled\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_output\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_val_scaled\u001b[49m\n\u001b[1;32m 58\u001b[0m \u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[8], line 730\u001b[0m, in \u001b[0;36mtrain_hybrid_model\u001b[0;34m(model, X_train, y_train, X_test, y_test, epochs, batch_size, folder_name, min_output, max_output)\u001b[0m\n\u001b[1;32m 717\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m 718\u001b[0m \u001b[38;5;124;03mtf.keras.callbacks.ReduceLROnPlateau(\u001b[39;00m\n\u001b[1;32m 719\u001b[0m \u001b[38;5;124;03m monitor='val_final_output_loss',\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[38;5;124;03m ),\u001b[39;00m\n\u001b[1;32m 728\u001b[0m \u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m 729\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 730\u001b[0m history \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 731\u001b[0m \u001b[43m \u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 732\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrain_targets\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 733\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalidation_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_targets\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 734\u001b[0m \u001b[43m \u001b[49m\u001b[43mepochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mepochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 735\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 736\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 737\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 738\u001b[0m \u001b[43m \u001b[49m\u001b[43mshuffle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 739\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 741\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mTraining completed successfully!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 743\u001b[0m \u001b[38;5;66;03m# Final evaluation\u001b[39;00m\n", "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback..error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 67\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[1;32m 68\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n", "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/tensorflow/python/eager/execute.py:60\u001b[0m, in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;66;03m# Convert any objects of type core_types.Tensor to Tensor.\u001b[39;00m\n\u001b[1;32m 54\u001b[0m inputs \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 55\u001b[0m tensor_conversion_registry\u001b[38;5;241m.\u001b[39mconvert(t)\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(t, core_types\u001b[38;5;241m.\u001b[39mTensor)\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m t\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m inputs\n\u001b[1;32m 59\u001b[0m ]\n\u001b[0;32m---> 60\u001b[0m tensors \u001b[38;5;241m=\u001b[39m pywrap_tfe\u001b[38;5;241m.\u001b[39mTFE_Py_Execute(ctx\u001b[38;5;241m.\u001b[39m_handle, device_name, op_name,\n\u001b[1;32m 61\u001b[0m inputs, attrs, num_outputs)\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m core\u001b[38;5;241m.\u001b[39m_NotOkStatusException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "\u001b[0;31mInvalidArgumentError\u001b[0m: Graph execution error:\n\nDetected at node model/regression_output/mul defined at (most recent call last):\n File \"\", line 198, in _run_module_as_main\n\n File \"\", line 88, in _run_code\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel_launcher.py\", line 17, in \n\n File \"/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py\", line 1046, in launch_instance\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py\", line 736, in start\n\n File \"/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py\", line 195, in start\n\n File \"/usr/lib/python3.11/asyncio/base_events.py\", line 604, in run_forever\n\n File \"/usr/lib/python3.11/asyncio/base_events.py\", line 1909, in _run_once\n\n File \"/usr/lib/python3.11/asyncio/events.py\", line 80, in _run\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 516, in dispatch_queue\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 505, in process_one\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 412, in dispatch_shell\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py\", line 740, in execute_request\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py\", line 422, in do_execute\n\n File \"/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py\", line 546, in run_cell\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3024, in run_cell\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3079, in _run_cell\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py\", line 129, in _pseudo_sync_runner\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3284, in run_cell_async\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3466, in run_ast_nodes\n\n File \"/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py\", line 3526, in run_code\n\n File \"/tmp/ipykernel_341907/1713792660.py\", line 47, in \n\n File \"/tmp/ipykernel_341907/594795021.py\", line 730, in train_hybrid_model\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1783, in fit\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1377, in train_function\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1360, in step_function\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1349, in run_step\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 1126, in train_step\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/training.py\", line 589, in __call__\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/base_layer.py\", line 1149, in __call__\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 96, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/functional.py\", line 515, in call\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/functional.py\", line 672, in _run_internal_graph\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 65, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/engine/base_layer.py\", line 1149, in __call__\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py\", line 96, in error_handler\n\n File \"/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/lambda_layer.py\", line 212, in call\n\n File \"/tmp/ipykernel_341907/594795021.py\", line 153, in \n\nrequired broadcastable shapes\n\t [[{{node model/regression_output/mul}}]] [Op:__inference_train_function_106117]" ] } ], "source": [ "#Model creation\n", "print(\"\\n2. Creating model...\")\n", "input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])\n", "\n", "min_val = df['solarenergy'].min()\n", "min_val_scaled = scaler_y.transform([[0]])[0][0]\n", "\n", "max_val = df['solarenergy'].max()\n", "max_val_scaled = scaler_y.transform([[max_val]])[0][0]\n", "\n", "print(f\"\\Min dataset solar energy : {min_val} - Scaled Version : {min_val_scaled}\")\n", "\n", "print(f\"\\nMax dataset solar energy : {max_val} - Scaled Version : {max_val_scaled}\")\n", "\n", "increase_percentage = 15\n", "\n", "max_val = max_val * (1 + increase_percentage / 100)\n", "max_val_scaled = max_val_scaled * (1 + increase_percentage / 100)\n", "\n", "print(f\"Max dataset solar energy increased by {increase_percentage}% : {max_val} - Scaled Version : {max_val_scaled}\")\n", "\n", "# Create the hybrid model\n", "model = create_solarenergy_model(\n", " input_shape=input_shape, \n", " folder_name=folder_name, \n", " min_output=min_val_scaled, \n", " max_output=max_val_scaled\n", ")\n", "\n", "# Prepare binary targets for classification\n", "y_train_binary = (y_train > 0).astype(float)\n", "y_test_binary = (y_test > 0).astype(float)\n", "\n", "print(\"\\nClass distribution in training set:\")\n", "print(f\"Zeros: {np.sum(y_train_binary == 0)} ({np.mean(y_train_binary == 0)*100:.2f}%)\")\n", "print(f\"Non-zeros: {np.sum(y_train_binary == 1)} ({np.mean(y_train_binary == 1)*100:.2f}%)\")\n", "\n", "print(\"\\nClass distribution in test set:\")\n", "print(f\"Zeros: {np.sum(y_test_binary == 0)} ({np.mean(y_test_binary == 0)*100:.2f}%)\")\n", "print(f\"Non-zeros: {np.sum(y_test_binary == 1)} ({np.mean(y_test_binary == 1)*100:.2f}%)\")\n", "\n", "# Get the exact output names from the model\n", "output_names = [output.name.split('/')[0] for output in model.outputs]\n", "print(\"\\nModel output names:\", output_names)\n", "\n", "print(\"\\n4. Starting training...\")\n", "history = train_hybrid_model(\n", " model=model,\n", " X_train=X_train_seq,\n", " y_train=y_train,\n", " X_test=X_test_seq,\n", " y_test=y_test,\n", " epochs=150,\n", " batch_size=512,\n", " folder_name=folder_name,\n", " min_output=min_val_scaled,\n", " max_output=max_val_scaled\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "958d78b99e8898d6", "metadata": {}, "outputs": [], "source": [ "print(\"\\n5. Generating predictions...\")\n", "predictions = model.predict(X_test_seq)\n", "classification_pred, regression_pred, final_pred = predictions\n", "\n", "# Clip solo le predizioni di regressione e finali\n", "regression_pred = np.clip(regression_pred, min_val_scaled, max_val_scaled)\n", "final_pred = np.clip(final_pred, min_val_scaled, max_val_scaled)\n", "\n", "# Inverse transform per tornare ai valori originali\n", "regression_pred_original = scaler_y.inverse_transform(regression_pred)\n", "final_pred_original = scaler_y.inverse_transform(final_pred)\n", "y_test_original = scaler_y.inverse_transform(y_test)\n", "\n", "print(\"\\n6. Evaluating model...\")\n", "# Valutazione delle predizioni finali\n", "metrics = evaluate_solarenergy_predictions(y_test_original, final_pred_original, folder_name=folder_name)\n", "\n", "# Create results dictionary con metriche aggiuntive per il modello ibrido\n", "training_results = {\n", " 'model_params': {\n", " 'input_shape': input_shape,\n", " 'n_features': len(features),\n", " 'sequence_length': X_train_seq.shape[1]\n", " },\n", " 'training_params': {\n", " 'batch_size': 192,\n", " 'total_epochs': len(history.history['loss']),\n", " 'best_epoch': np.argmin(history.history['val_final_output_loss']) + 1\n", " },\n", " 'performance_metrics': {\n", " 'classification': {\n", " 'final_loss': float(history.history['val_classification_output_loss'][-1]),\n", " 'final_auc': float(history.history['val_classification_output_auc'][-1])\n", " },\n", " 'regression': {\n", " 'final_loss': float(history.history['val_regression_output_loss'][-1]),\n", " 'final_mae': float(history.history['val_regression_output_mae'][-1]),\n", " 'out_of_range_predictions': int(np.sum((regression_pred < 0) | (regression_pred > max_val_scaled)))\n", " },\n", " 'final_output': {\n", " 'final_loss': float(history.history['val_final_output_loss'][-1]),\n", " 'final_mae': float(history.history['val_final_output_mae'][-1]),\n", " 'best_val_loss': float(min(history.history['val_final_output_loss'])),\n", " 'out_of_range_predictions': int(np.sum((final_pred < 0) | (final_pred > max_val_scaled)))\n", " }\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "5c05d1d03336b1e4", "metadata": {}, "outputs": [], "source": [ "print(\"\\n7. Predicting missing data...\")\n", "to_predict_predictions = model.predict(X_to_predict_seq)\n", "classification_pred, regression_pred, final_pred = to_predict_predictions\n", "\n", "# Clip solo le predizioni finali che useremo per l'integrazione\n", "final_pred = np.clip(final_pred, min_val_scaled, max_val_scaled)\n", "final_pred_original = scaler_y.inverse_transform(final_pred)\n", "\n", "print(\"\\n8. Integrating predictions into original dataset...\")\n", "df_updated = integrate_predictions(df.copy(), predictions=(classification_pred, regression_pred, final_pred_original))\n", "\n", "df_updated.to_parquet('../../sources/weather_data_solarenergy.parquet')\n", "\n", "# Add prediction statistics to training_results\n", "training_results['prediction_stats'] = {\n", " 'n_predictions_added': len(final_pred_original),\n", " 'classification_stats': {\n", " 'predicted_zeros': int(np.sum(classification_pred < 0.5)),\n", " 'predicted_non_zeros': int(np.sum(classification_pred >= 0.5)),\n", " 'mean_confidence': float(classification_pred.mean()),\n", " },\n", " 'regression_stats': {\n", " 'mean_predicted_value': float(regression_pred.mean()),\n", " 'min_predicted_value': float(regression_pred.min()),\n", " 'max_predicted_value': float(regression_pred.max()),\n", " },\n", " 'final_predictions': {\n", " 'mean_predicted_solarenergy': float(final_pred_original.mean()),\n", " 'min_predicted_solarenergy': float(final_pred_original.min()),\n", " 'max_predicted_solarenergy': float(final_pred_original.max()),\n", " 'zero_predictions': int(np.sum(final_pred_original == 0)),\n", " 'non_zero_predictions': int(np.sum(final_pred_original > 0)),\n", " }\n", "}\n", "\n", "print(\"\\nPrediction Statistics:\")\n", "print(f\"Total predictions added: {training_results['prediction_stats']['n_predictions_added']}\")\n", "print(\"\\nClassification Statistics:\")\n", "print(f\"Predicted zeros: {training_results['prediction_stats']['classification_stats']['predicted_zeros']} \"\n", " f\"({training_results['prediction_stats']['classification_stats']['predicted_zeros']/len(final_pred_original)*100:.2f}%)\")\n", "print(f\"Predicted non-zeros: {training_results['prediction_stats']['classification_stats']['predicted_non_zeros']} \"\n", " f\"({training_results['prediction_stats']['classification_stats']['predicted_non_zeros']/len(final_pred_original)*100:.2f}%)\")\n", "print(f\"Mean classification confidence: {training_results['prediction_stats']['classification_stats']['mean_confidence']:.4f}\")\n", "\n", "print(\"\\nFinal Predictions Statistics:\")\n", "print(f\"Mean solar energy: {training_results['prediction_stats']['final_predictions']['mean_predicted_solarenergy']:.2f}\")\n", "print(f\"Min solar energy: {training_results['prediction_stats']['final_predictions']['min_predicted_solarenergy']:.2f}\")\n", "print(f\"Max solar energy: {training_results['prediction_stats']['final_predictions']['max_predicted_solarenergy']:.2f}\")\n", "print(f\"Zero predictions: {training_results['prediction_stats']['final_predictions']['zero_predictions']} \"\n", " f\"({training_results['prediction_stats']['final_predictions']['zero_predictions']/len(final_pred_original)*100:.2f}%)\")\n", "\n", "print(\"\\nTraining completed successfully!\")\n", "\n", "tf.keras.backend.clear_session()" ] }, { "cell_type": "code", "execution_count": null, "id": "ef29b3ecdf12c6db", "metadata": {}, "outputs": [], "source": [ "analyze_distribution(df_updated, 'solarenergy', 'Solar Energy')" ] }, { "cell_type": "code", "execution_count": null, "id": "e884cc287364c4ed", "metadata": {}, "outputs": [], "source": [ "def plot_error_analysis(y_true, predictions, folder_name=None):\n", " \"\"\"\n", " Function to visualize prediction error analysis for the hybrid model\n", "\n", " Parameters:\n", " -----------\n", " y_true : array-like\n", " Actual values\n", " predictions : tuple\n", " Tuple containing (classification_pred, regression_pred, final_pred)\n", " folder_name : str, optional\n", " Directory to save plots. If None, plots are only displayed\n", "\n", " Generates:\n", " ----------\n", " - Classification analysis plots\n", " - Regression error analysis plots\n", " - Final prediction error analysis plots\n", " \"\"\"\n", " from sklearn.metrics import roc_curve\n", "\n", " # Unpack predictions\n", " classification_pred, regression_pred, final_pred = predictions\n", "\n", " # Convert to 1D numpy arrays if needed\n", " y_true = np.ravel(y_true)\n", " classification_pred = np.ravel(classification_pred)\n", " regression_pred = np.ravel(regression_pred)\n", " final_pred = np.ravel(final_pred)\n", "\n", " # Create binary ground truth\n", " y_true_binary = (y_true > 0).astype(float)\n", "\n", " # Calculate errors for regression and final predictions\n", " regression_errors = regression_pred - y_true\n", " final_errors = final_pred - y_true\n", "\n", " # Create main figure\n", " plt.figure(figsize=(20, 15))\n", "\n", " # Classification Analysis (Top Row)\n", " # Plot 1: Classification Distribution\n", " plt.subplot(3, 3, 1)\n", " plt.hist(classification_pred, bins=50, alpha=0.7)\n", " plt.axvline(x=0.5, color='r', linestyle='--')\n", " plt.title('Classification Probability Distribution')\n", " plt.xlabel('Classification Probability')\n", " plt.ylabel('Frequency')\n", "\n", " # Plot 2: ROC Curve\n", " plt.subplot(3, 3, 2)\n", " fpr, tpr, _ = roc_curve(y_true_binary, classification_pred)\n", " plt.plot(fpr, tpr)\n", " plt.plot([0, 1], [0, 1], 'r--')\n", " plt.title(f'ROC Curve (AUC = {roc_auc_score(y_true_binary, classification_pred):.4f})')\n", " plt.xlabel('False Positive Rate')\n", " plt.ylabel('True Positive Rate')\n", "\n", " # Plot 3: Classification Confusion Matrix\n", " plt.subplot(3, 3, 3)\n", " cm = confusion_matrix(y_true_binary, classification_pred > 0.5)\n", " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')\n", " plt.title('Classification Confusion Matrix')\n", " plt.xlabel('Predicted')\n", " plt.ylabel('Actual')\n", "\n", " # Regression Analysis (Middle Row)\n", " # Plot 4: Regression Error Distribution\n", " plt.subplot(3, 3, 4)\n", " plt.hist(regression_errors[y_true > 0], bins=50, alpha=0.7)\n", " plt.title('Regression Error Distribution (Non-zero Values)')\n", " plt.xlabel('Error')\n", " plt.ylabel('Frequency')\n", "\n", " # Plot 5: Actual vs Predicted (Regression)\n", " plt.subplot(3, 3, 5)\n", " mask_nonzero = y_true > 0\n", " plt.scatter(y_true[mask_nonzero], regression_pred[mask_nonzero], alpha=0.5)\n", " plt.plot([y_true[mask_nonzero].min(), y_true[mask_nonzero].max()],\n", " [y_true[mask_nonzero].min(), y_true[mask_nonzero].max()], 'r--', lw=2)\n", " plt.title('Actual vs Predicted (Regression, Non-zero Values)')\n", " plt.xlabel('Actual Values')\n", " plt.ylabel('Predicted Values')\n", "\n", " # Plot 6: Regression Errors vs Actual Values\n", " plt.subplot(3, 3, 6)\n", " plt.scatter(y_true[mask_nonzero], regression_errors[mask_nonzero], alpha=0.5)\n", " plt.axhline(y=0, color='r', linestyle='--')\n", " plt.title('Regression Errors vs Actual Values (Non-zero Values)')\n", " plt.xlabel('Actual Values')\n", " plt.ylabel('Error')\n", "\n", " # Final Predictions Analysis (Bottom Row)\n", " # Plot 7: Final Error Distribution\n", " plt.subplot(3, 3, 7)\n", " plt.hist(final_errors, bins=50, alpha=0.7)\n", " plt.title('Final Prediction Error Distribution')\n", " plt.xlabel('Error')\n", " plt.ylabel('Frequency')\n", "\n", " # Plot 8: Actual vs Predicted (Final)\n", " plt.subplot(3, 3, 8)\n", " plt.scatter(y_true, final_pred, alpha=0.5)\n", " plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)\n", " plt.title('Actual vs Predicted (Final)')\n", " plt.xlabel('Actual Values')\n", " plt.ylabel('Predicted Values')\n", "\n", " # Plot 9: Final Errors vs Actual Values\n", " plt.subplot(3, 3, 9)\n", " plt.scatter(y_true, final_errors, alpha=0.5)\n", " plt.axhline(y=0, color='r', linestyle='--')\n", " plt.title('Final Errors vs Actual Values')\n", " plt.xlabel('Actual Values')\n", " plt.ylabel('Error')\n", "\n", " plt.tight_layout()\n", "\n", " # Save plot if directory is specified\n", " if folder_name is not None:\n", " try:\n", " filename = f'{folder_name}_error_analysis.png'\n", " plt.savefig(filename, dpi=300, bbox_inches='tight')\n", " print(f\"\\nPlot saved as: {filename}\")\n", " except Exception as e:\n", " print(f\"\\nError saving plot: {str(e)}\")\n", "\n", " plt.show()\n", "\n", " # Print comprehensive statistics\n", " print(\"\\nClassification Statistics:\")\n", " print(classification_report(y_true_binary, classification_pred > 0.5))\n", " print(f\"AUC-ROC: {roc_auc_score(y_true_binary, classification_pred):.4f}\")\n", "\n", " print(\"\\nRegression Statistics (Non-zero values):\")\n", " mask_nonzero = y_true > 0\n", " if np.any(mask_nonzero):\n", " print(f\"MAE: {np.mean(np.abs(regression_errors[mask_nonzero])):.4f}\")\n", " print(f\"RMSE: {np.sqrt(np.mean(regression_errors[mask_nonzero] ** 2)):.4f}\")\n", " print(f\"Mean error: {np.mean(regression_errors[mask_nonzero]):.4f}\")\n", " print(f\"Error std: {np.std(regression_errors[mask_nonzero]):.4f}\")\n", "\n", " print(\"\\nFinal Prediction Statistics:\")\n", " print(f\"MAE: {np.mean(np.abs(final_errors)):.4f}\")\n", " print(f\"RMSE: {np.sqrt(np.mean(final_errors ** 2)):.4f}\")\n", " print(f\"Mean error: {np.mean(final_errors):.4f}\")\n", " print(f\"Error std: {np.std(final_errors):.4f}\")\n", "\n", " # Calculate percentage of errors within thresholds\n", " thresholds = [0.5, 1.0, 1.5, 2.0]\n", " print(\"\\nError Thresholds (Final Predictions):\")\n", " for threshold in thresholds:\n", " within_threshold = np.mean(np.abs(final_errors) <= threshold) * 100\n", " print(f\"Predictions within ±{threshold}: {within_threshold:.1f}%\")\n", "\n", "# Example usage\n", "plot_error_analysis(y_test, predictions, folder_name=folder_name)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0rc1" } }, "nbformat": 4, "nbformat_minor": 5 }