olive-oil-transformer-model/models/solarenergy/.ipynb_checkpoints/solarenergy_model_v1-checkpoint.ipynb

2987 lines
389 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "8adcbe0819b88578",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease\n",
"Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease\n",
"Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n",
"Hit:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease\n",
"Hit:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n",
"Reading package lists... Done\n",
"Reading package lists... Done\n",
"Building dependency tree... Done\n",
"Reading state information... Done\n",
"graphviz is already the newest version (2.42.2-6ubuntu0.1).\n",
"0 upgraded, 0 newly installed, 0 to remove and 121 not upgraded.\n",
"Requirement already satisfied: tensorflow in /usr/local/lib/python3.11/dist-packages (2.14.0)\n",
"Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.0.0)\n",
"Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.6.3)\n",
"Requirement already satisfied: flatbuffers>=23.5.26 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (23.5.26)\n",
"Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.5.4)\n",
"Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.2.0)\n",
"Requirement already satisfied: h5py>=2.9.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (3.9.0)\n",
"Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (16.0.6)\n",
"Requirement already satisfied: ml-dtypes==0.2.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.2.0)\n",
"Requirement already satisfied: numpy>=1.23.5 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.26.0)\n",
"Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (3.3.0)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorflow) (23.1)\n",
"Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (4.24.3)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from tensorflow) (68.2.2)\n",
"Requirement already satisfied: six>=1.12.0 in /usr/lib/python3/dist-packages (from tensorflow) (1.16.0)\n",
"Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.3.0)\n",
"Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (4.8.0)\n",
"Requirement already satisfied: wrapt<1.15,>=1.11.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.14.1)\n",
"Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (0.37.1)\n",
"Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (1.58.0)\n",
"Requirement already satisfied: tensorboard<2.15,>=2.14 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.14.0)\n",
"Requirement already satisfied: tensorflow-estimator<2.15,>=2.14.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.14.0)\n",
"Requirement already satisfied: keras<2.15,>=2.14.0 in /usr/local/lib/python3.11/dist-packages (from tensorflow) (2.14.0)\n",
"Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from astunparse>=1.6.0->tensorflow) (0.41.2)\n",
"Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (2.23.1)\n",
"Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (1.0.0)\n",
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (3.4.4)\n",
"Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (2.31.0)\n",
"Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (0.7.1)\n",
"Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard<2.15,>=2.14->tensorflow) (2.3.7)\n",
"Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (5.3.1)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (0.3.0)\n",
"Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (4.9)\n",
"Requirement already satisfied: urllib3>=2.0.5 in /usr/local/lib/python3.11/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (2.0.5)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow) (1.3.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.21.0->tensorboard<2.15,>=2.14->tensorflow) (3.2.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.21.0->tensorboard<2.15,>=2.14->tensorflow) (3.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.21.0->tensorboard<2.15,>=2.14->tensorflow) (2023.7.22)\n",
"Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.11/dist-packages (from werkzeug>=1.0.1->tensorboard<2.15,>=2.14->tensorflow) (2.1.3)\n",
"Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.11/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow) (0.5.0)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/lib/python3/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow) (3.2.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (1.26.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.3)\n",
"Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (1.26.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: keras in /usr/local/lib/python3.11/dist-packages (2.14.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.5.2)\n",
"Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.26.0)\n",
"Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.14.1)\n",
"Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.5.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (3.8.0)\n",
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.1.1)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (0.11.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (4.42.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.4.5)\n",
"Requirement already satisfied: numpy<2,>=1.21 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.26.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (10.0.1)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (3.2.0)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (1.4.2)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: pyarrow in /usr/local/lib/python3.11/dist-packages (18.1.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: fastparquet in /usr/local/lib/python3.11/dist-packages (2024.11.0)\n",
"Requirement already satisfied: pandas>=1.5.0 in /usr/local/lib/python3.11/dist-packages (from fastparquet) (2.2.3)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from fastparquet) (1.26.0)\n",
"Requirement already satisfied: cramjam>=2.3 in /usr/local/lib/python3.11/dist-packages (from fastparquet) (2.9.0)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from fastparquet) (2024.10.0)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from fastparquet) (23.1)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->fastparquet) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->fastparquet) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.5.0->fastparquet) (2024.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas>=1.5.0->fastparquet) (1.16.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (1.14.1)\n",
"Requirement already satisfied: numpy<2.3,>=1.23.5 in /usr/local/lib/python3.11/dist-packages (from scipy) (1.26.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: seaborn in /usr/local/lib/python3.11/dist-packages (0.13.2)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.20 in /usr/local/lib/python3.11/dist-packages (from seaborn) (1.26.0)\n",
"Requirement already satisfied: pandas>=1.2 in /usr/local/lib/python3.11/dist-packages (from seaborn) (2.2.3)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /usr/local/lib/python3.11/dist-packages (from seaborn) (3.8.0)\n",
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.1.1)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.11.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.42.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.5)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.0.1)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.0)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.2->seaborn) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=1.2->seaborn) (2024.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (4.67.1)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: pydot in /usr/local/lib/python3.11/dist-packages (3.0.2)\n",
"Requirement already satisfied: pyparsing>=3.0.9 in /usr/local/lib/python3.11/dist-packages (from pydot) (3.2.0)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: tensorflow-io in /usr/local/lib/python3.11/dist-packages (0.37.1)\n",
"Requirement already satisfied: tensorflow-io-gcs-filesystem==0.37.1 in /usr/local/lib/python3.11/dist-packages (from tensorflow-io) (0.37.1)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n",
"Requirement already satisfied: tensorflow-addons in /usr/local/lib/python3.11/dist-packages (0.23.0)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (23.1)\n",
"Requirement already satisfied: typeguard<3.0.0,>=2.7 in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (2.13.3)\n",
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"# from opt_einsum.paths import branch_1\n",
"!apt-get update\n",
"!apt-get install graphviz -y\n",
"\n",
"!pip install tensorflow\n",
"!pip install numpy\n",
"!pip install pandas\n",
"\n",
"!pip install keras\n",
"!pip install scikit-learn\n",
"!pip install matplotlib\n",
"!pip install joblib\n",
"!pip install pyarrow\n",
"!pip install fastparquet\n",
"!pip install scipy\n",
"!pip install seaborn\n",
"!pip install tqdm\n",
"!pip install pydot\n",
"!pip install tensorflow-io\n",
"!pip install tensorflow-addons"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e6fe6bb613168a8a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-11-27 23:17:43.475455: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
"2024-11-27 23:17:43.475499: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
"2024-11-27 23:17:43.475533: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"2024-11-27 23:17:43.483362: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"/usr/local/lib/python3.11/dist-packages/tensorflow_addons/utils/tfa_eol_msg.py:23: UserWarning: \n",
"\n",
"TensorFlow Addons (TFA) has ended development and introduction of new features.\n",
"TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n",
"Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n",
"\n",
"For more information see: https://github.com/tensorflow/addons/issues/2807 \n",
"\n",
" warnings.warn(\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras.layers import (\n",
" Dense, LSTM, MultiHeadAttention, Dropout, BatchNormalization, \n",
" LayerNormalization, Input, Activation, Lambda, Bidirectional, \n",
" Add, MaxPooling1D, SpatialDropout1D, GlobalAveragePooling1D,\n",
" GlobalMaxPooling1D, Concatenate, ThresholdedReLU, Average,\n",
" Conv1D, Multiply\n",
")\n",
"from tensorflow.keras import regularizers\n",
"from tensorflow.keras.models import Model\n",
"from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau\n",
"from tensorflow.keras.optimizers import AdamW\n",
"from tensorflow.keras.metrics import AUC\n",
"from tensorflow.keras.utils import plot_model\n",
"\n",
"# Data processing and analysis\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import RobustScaler\n",
"from sklearn.metrics import (\n",
" mean_absolute_error, mean_squared_error, r2_score, \n",
" confusion_matrix, classification_report, roc_auc_score\n",
")\n",
"\n",
"# Visualization\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Additional utilities\n",
"import tensorflow_addons as tfa\n",
"from scipy import stats\n",
"import json\n",
"from datetime import datetime\n",
"import os\n",
"import joblib\n",
"\n",
"folder_name = datetime.now().strftime(\"%Y-%m-%d_%H-%M\")\n",
"\n",
"random_state_value = None"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3da8b15c7eb9833f",
"metadata": {},
"outputs": [],
"source": [
"def get_season(date):\n",
" month = date.month\n",
" day = date.day\n",
" if (month == 12 and day >= 21) or (month <= 3 and day < 20):\n",
" return 'Winter'\n",
" elif (month == 3 and day >= 20) or (month <= 6 and day < 21):\n",
" return 'Spring'\n",
" elif (month == 6 and day >= 21) or (month <= 9 and day < 23):\n",
" return 'Summer'\n",
" elif (month == 9 and day >= 23) or (month <= 12 and day < 21):\n",
" return 'Autumn'\n",
" else:\n",
" return 'Unknown'\n",
"\n",
"\n",
"def get_time_period(hour):\n",
" if 5 <= hour < 12:\n",
" return 'Morning'\n",
" elif 12 <= hour < 17:\n",
" return 'Afternoon'\n",
" elif 17 <= hour < 21:\n",
" return 'Evening'\n",
" else:\n",
" return 'Night'\n",
"\n",
"\n",
"def add_time_features(df):\n",
" df['datetime'] = pd.to_datetime(df['datetime'])\n",
" df['timestamp'] = df['datetime'].astype(np.int64) // 10 ** 9\n",
" df['year'] = df['datetime'].dt.year\n",
" df['month'] = df['datetime'].dt.month\n",
" df['day'] = df['datetime'].dt.day\n",
" df['hour'] = df['datetime'].dt.hour\n",
" df['minute'] = df['datetime'].dt.minute\n",
" df['hour_sin'] = np.sin(df['hour'] * (2 * np.pi / 24))\n",
" df['hour_cos'] = np.cos(df['hour'] * (2 * np.pi / 24))\n",
" df['day_of_week'] = df['datetime'].dt.dayofweek\n",
" df['day_of_year'] = df['datetime'].dt.dayofyear\n",
" df['week_of_year'] = df['datetime'].dt.isocalendar().week.astype(int)\n",
" df['quarter'] = df['datetime'].dt.quarter\n",
" df['is_month_end'] = df['datetime'].dt.is_month_end.astype(int)\n",
" df['is_quarter_end'] = df['datetime'].dt.is_quarter_end.astype(int)\n",
" df['is_year_end'] = df['datetime'].dt.is_year_end.astype(int)\n",
" df['month_sin'] = np.sin(df['month'] * (2 * np.pi / 12))\n",
" df['month_cos'] = np.cos(df['month'] * (2 * np.pi / 12))\n",
" df['day_of_year_sin'] = np.sin(df['day_of_year'] * (2 * np.pi / 365.25))\n",
" df['day_of_year_cos'] = np.cos(df['day_of_year'] * (2 * np.pi / 365.25))\n",
" df['season'] = df['datetime'].apply(get_season)\n",
" df['time_period'] = df['hour'].apply(get_time_period)\n",
" return df\n",
"\n",
"\n",
"def add_solar_features(df):\n",
" # Features based only on radiation and other available variables\n",
" df['solar_elevation'] = np.sin(df['day_of_year'] * (2 * np.pi / 365.25)) * np.sin(df['hour'] * (2 * np.pi / 24))\n",
"\n",
" # Energy-specific features\n",
" df['radiation_clearsky'] = df['solarradiation'] * (100 - df['cloudcover']) / 100\n",
"\n",
" # Temperature impact on theoretical efficiency\n",
" df['temp_efficiency_factor'] = 1 - 0.004 * (df['temp'] - 25) # Typical temperature coefficient\n",
"\n",
" # Combined features\n",
" df['cloud_impact'] = df['cloudcover'] * df['solarradiation']\n",
" df['visibility_radiation'] = df['visibility'] * df['solarradiation']\n",
" df['clear_sky_index'] = (100 - df['cloudcover']) / 100\n",
" df['temp_effect'] = df['temp'] - df['tempmin']\n",
"\n",
" return df\n",
"\n",
"def add_solar_specific_features(df):\n",
" \"\"\"\n",
" Aggiunge feature specifiche per la predizione della radiazione solare\n",
" combinando caratteristiche astronomiche e meteorologiche\n",
" \"\"\"\n",
" # Caratteristiche astronomiche\n",
" df['day_length'] = 12 + 3 * np.sin(2 * np.pi * (df['day_of_year'] - 81) / 365.25)\n",
" df['solar_noon'] = np.abs(12 - df['hour'])\n",
" df['solar_elevation'] = np.sin(2 * np.pi * df['day_of_year'] / 365.25) * np.cos(2 * np.pi * df['solar_noon'] / 24)\n",
"\n",
" # Angolo solare teorico\n",
" df['solar_angle'] = np.sin(df['hour_sin']) * np.sin(df['day_of_year_sin'])\n",
"\n",
" # Interazioni con condizioni atmosferiche\n",
" df['cloud_elevation'] = df['cloudcover'] * df['solar_elevation']\n",
" df['visibility_elevation'] = df['visibility'] * df['solar_elevation']\n",
" df['uv_cloud_interaction'] = df['uvindex'] * (100 - df['cloudcover']) / 100\n",
"\n",
" # Indici di chiarezza e trasmissione\n",
" df['clearness_index'] = (100 - df['cloudcover']) * df['visibility'] / 10000\n",
" df['atmospheric_attenuation'] = (df['pressure'] / 1013.25) * (1 - (df['humidity'] / 100) * 0.6)\n",
"\n",
" # Radiazione teorica e attenuazione\n",
" df['theoretical_radiation'] = df['solar_angle'].clip(0, 1) * 1000\n",
" df['expected_radiation'] = df['theoretical_radiation'] * df['clearness_index']\n",
"\n",
" # Rolling features\n",
" df['cloud_rolling_12h'] = df['cloudcover'].rolling(window=12).mean()\n",
" df['temp_rolling_12h'] = df['temp'].rolling(window=12).mean()\n",
" df['uv_rolling_12h'] = df['uvindex'].rolling(window=12).mean()\n",
"\n",
" # Interazioni temperatura-radiazione\n",
" df['temp_radiation_potential'] = df['temp'] * df['solar_elevation']\n",
"\n",
" return df\n",
"\n",
"def add_radiation_energy_features(df):\n",
" \"\"\"Adds specific features based on solarenergy and uvindex\"\"\"\n",
"\n",
" # Solar energy to UV ratio (independent from solarradiation)\n",
" df['energy_uv_ratio'] = df['solarenergy'] / (df['uvindex'] + 1e-6)\n",
"\n",
" # Time aggregations\n",
" # Moving averages\n",
" windows = [3, 6, 12, 24] # hours\n",
" for w in windows:\n",
" df[f'energy_rolling_mean_{w}h'] = df['solarenergy'].rolling(window=w).mean()\n",
" df[f'uv_rolling_mean_{w}h'] = df['uvindex'].rolling(window=w).mean()\n",
"\n",
" # Daily aggregations utilizzando datetime\n",
" df['energy_daily_sum'] = df.groupby(df['datetime'].dt.date)['solarenergy'].transform('sum')\n",
" df['uv_daily_max'] = df.groupby(df['datetime'].dt.date)['uvindex'].transform('max')\n",
"\n",
" # Changes\n",
" df['energy_change'] = df['solarenergy'].diff()\n",
" df['uv_change'] = df['uvindex'].diff()\n",
"\n",
" # Lag features\n",
" lags = [1, 2, 3, 6, 12, 24] # hours\n",
" for lag in lags:\n",
" df[f'energy_lag_{lag}h'] = df['solarenergy'].shift(lag)\n",
" df[f'uv_lag_{lag}h'] = df['uvindex'].shift(lag)\n",
"\n",
" # Peak indicators\n",
" df['is_energy_peak'] = (df['solarenergy'] > df['energy_rolling_mean_6h'] * 1.2).astype(int)\n",
" df['is_uv_peak'] = (df['uvindex'] > df['uv_rolling_mean_6h'] * 1.2).astype(int)\n",
"\n",
" # Aggiungiamo alcune metriche di volatilità\n",
" df['energy_volatility'] = df['energy_change'].rolling(window=24).std()\n",
" df['uv_volatility'] = df['uv_change'].rolling(window=24).std()\n",
"\n",
" # Indice di intensità solare composito\n",
" df['solar_intensity_index'] = (df['solarenergy'] * df['uvindex']) / (df['cloudcover'] + 1e-6)\n",
"\n",
" # Interazioni\n",
" df['uv_cloud_interaction'] = df['uvindex'] * (100 - df['cloudcover']) / 100\n",
" df['energy_temp_interaction'] = df['solarenergy'] * df['temp']\n",
"\n",
" return df\n",
"\n",
"def add_atmospheric_features(df):\n",
" # Indice di Massa d'Aria (Air Mass Index)\n",
" # Rappresenta il percorso ottico relativo dei raggi solari attraverso l'atmosfera\n",
" df['air_mass_index'] = 1 / (np.cos(np.radians(90 - df['solar_elevation'])) + 0.50572 *\n",
" (96.07995 - (90 - df['solar_elevation']))**-1.6364)\n",
"\n",
" # Indice di Stabilità Atmosferica\n",
" # Combina temperatura, umidità e pressione\n",
" df['atmospheric_stability'] = (df['temp'] * (100 - df['humidity'])) / df['pressure']\n",
"\n",
" # Vapor Pressure Deficit (VPD)\n",
" # Importante per la radiazione diffusa\n",
" df['saturation_vapor_pressure'] = 0.6108 * np.exp(17.27 * df['temp'] / (df['temp'] + 237.3))\n",
" df['actual_vapor_pressure'] = df['saturation_vapor_pressure'] * (df['humidity'] / 100)\n",
" df['vapor_pressure_deficit'] = df['saturation_vapor_pressure'] - df['actual_vapor_pressure']\n",
"\n",
" return df\n",
"\n",
"def add_diffusion_features(df):\n",
" # Indice di Diffusione\n",
" df['diffusion_index'] = (df['cloudcover'] * df['humidity']) / 10000\n",
"\n",
" # Radiazione Diretta vs Diffusa\n",
" df['direct_radiation'] = df['solarradiation'] * (1 - df['diffusion_index'])\n",
" df['diffuse_radiation'] = df['solarradiation'] * df['diffusion_index']\n",
"\n",
" # Fattore di Trasparenza Atmosferica\n",
" df['atmospheric_transmittance'] = (1 - df['cloudcover']/100) * (df['visibility']/10) * (1 - df['humidity']/200)\n",
"\n",
" return df\n",
"\n",
"def calculate_trend(x):\n",
" try:\n",
" return np.polyfit(np.arange(len(x)), x, 1)[0]\n",
" except:\n",
" return np.nan\n",
"\n",
"def add_persistence_features(df):\n",
" # Create a copy to avoid modifying the original dataframe\n",
" df = df.copy()\n",
"\n",
" # Calculate trends more efficiently\n",
" windows = [3, 6, 12, 24]\n",
" for w in windows:\n",
" # Use numba or vectorized operations if possible\n",
" df[f'radiation_trend_{w}h'] = df['solarradiation'].rolling(\n",
" window=w,\n",
" min_periods=w\n",
" ).apply(calculate_trend, raw=True)\n",
"\n",
" # Optimize volatility calculation by doing it in one pass\n",
" rolling_24 = df['solarradiation'].rolling(24, min_periods=1)\n",
" df['radiation_volatility'] = rolling_24.std() / rolling_24.mean().clip(lower=1e-10)\n",
"\n",
" return df\n",
"\n",
"def add_weather_pattern_features(df):\n",
" # Pattern giornalieri\n",
" df['clear_sky_duration'] = df.groupby(df['datetime'].dt.date)['cloudcover'].transform(\n",
" lambda x: (x < 30).sum()\n",
" )\n",
"\n",
" # Stabilità delle condizioni\n",
" for col in ['temp', 'humidity', 'cloudcover']:\n",
" df[f'{col}_stability'] = df[col].rolling(12).std() / df[col].rolling(12).mean()\n",
"\n",
" # Indice di Variabilità Meteorologica\n",
" df['weather_variability_index'] = (df['temp_stability'] +\n",
" df['humidity_stability'] +\n",
" df['cloudcover_stability']) / 3\n",
"\n",
" return df\n",
"\n",
"def add_efficiency_features(df):\n",
" # Perdite per temperatura\n",
" df['temp_losses'] = 0.004 * (df['temp'] - 25).clip(lower=0) # 0.4% per grado sopra 25°C\n",
"\n",
" # Perdite per polvere/sporco (stima basata su umidità e pressione)\n",
" df['soiling_loss_factor'] = 0.002 * (df['humidity']/100) * (df['pressure']/1013.25)\n",
"\n",
" # Efficienza complessiva stimata\n",
" df['estimated_efficiency'] = (1 - df['temp_losses']) * (1 - df['soiling_loss_factor']) * \\\n",
" df['atmospheric_transmittance']\n",
"\n",
" # Potenziale di produzione\n",
" df['production_potential'] = df['solarradiation'] * df['estimated_efficiency']\n",
"\n",
" return df\n",
"\n",
"def add_advanced_seasonal_features(df):\n",
" # Differenza dalla durata media del giorno\n",
" avg_day_length = 12\n",
" df['day_length_deviation'] = df['day_length'] - avg_day_length\n",
"\n",
" # Intensità stagionale\n",
" df['seasonal_intensity'] = np.sin(2 * np.pi * (df['day_of_year'] - 172) / 365.25)\n",
"\n",
" # Indice di Stagionalità\n",
" df['seasonality_index'] = df['seasonal_intensity'] * df['solar_elevation']\n",
"\n",
" # Correzione per alba/tramonto\n",
" df['daylight_correction'] = np.where(\n",
" (df['hour'] >= df['day_length']) | (df['hour'] <= 24-df['day_length']),\n",
" 0,\n",
" 1\n",
" )\n",
"\n",
" return df\n",
"\n",
"def add_basic_interactions(df):\n",
" \"\"\"\n",
" Aggiunge le interazioni base tra variabili meteorologiche\n",
" \"\"\"\n",
" # Feature esistenti originali\n",
" df['temp_humidity'] = df['temp'] * df['humidity']\n",
" df['temp_cloudcover'] = df['temp'] * df['cloudcover']\n",
" df['visibility_cloudcover'] = df['visibility'] * df['cloudcover']\n",
" df['temp_humidity_interaction'] = df['temp'] * df['humidity'] / 100\n",
"\n",
" # Clear sky e trasparenza atmosferica\n",
" df['clear_sky_factor'] = (100 - df['cloudcover']) / 100\n",
" df['atmospheric_transparency'] = (100 - df['cloudcover']) * (df['visibility'] / 10)\n",
"\n",
" return df\n",
"\n",
"def add_rolling_and_lag_features(df):\n",
" \"\"\"\n",
" Aggiunge feature rolling e lag\n",
" \"\"\"\n",
" # Rolling means esistenti\n",
" df['temp_rolling_mean_6h'] = df['temp'].rolling(window=6).mean()\n",
" df['cloudcover_rolling_mean_6h'] = df['cloudcover'].rolling(window=6).mean()\n",
"\n",
" # Lag features esistenti\n",
" df['temp_1h_lag'] = df['temp'].shift(1)\n",
" df['cloudcover_1h_lag'] = df['cloudcover'].shift(1)\n",
" df['humidity_1h_lag'] = df['humidity'].shift(1)\n",
"\n",
" return df\n",
"\n",
"def add_condition_indicators(df):\n",
" \"\"\"\n",
" Aggiunge indicatori di condizioni particolari\n",
" \"\"\"\n",
" # Extreme conditions indicator esistente\n",
" df['extreme_conditions'] = ((df['temp'] > df['temp'].quantile(0.75)) &\n",
" (df['humidity'] < df['humidity'].quantile(0.25))).astype(int)\n",
"\n",
" return df\n",
"\n",
"def add_physics_based_conversion_features(df):\n",
" \"\"\"\n",
" Aggiunge feature specifiche per la conversione tra radiazione ed energia\n",
" \"\"\"\n",
" # Conversione da kWh a MJ/m²/h (1 W = 1 J/s = 0.0036 MJ/h)\n",
" df['radiation_to_energy'] = df['solarradiation'] * 0.0036\n",
"\n",
" # Efficienza di conversione reale vs teorica\n",
" df['conversion_efficiency_ratio'] = df['solarenergy'] / df['radiation_to_energy'].clip(lower=1e-6)\n",
"\n",
" # Energia accumulata nel tempo (integrazione)\n",
" df['energy_integral'] = df['radiation_to_energy'].rolling(window=24).sum()\n",
"\n",
" # Differenza tra energia teorica e reale\n",
" df['energy_conversion_gap'] = df['radiation_to_energy'] - df['solarenergy']\n",
"\n",
" # Indice di performance del sistema\n",
" df['system_performance_ratio'] = df['solarenergy'] / df['radiation_to_energy'].clip(lower=1e-6)\n",
"\n",
" return df\n",
"\n",
"def add_advanced_features(df):\n",
" \"\"\"\n",
" Add all advanced features to the DataFrame\n",
" \"\"\"\n",
" # Feature esistenti di base\n",
" # 1. Feature temporali di base\n",
" df = add_time_features(df)\n",
"\n",
" # 2. Feature solari e meteorologiche\n",
" df = add_solar_features(df)\n",
" df = add_solar_specific_features(df)\n",
" df = add_radiation_energy_features(df)\n",
"\n",
" # 3. Feature atmosferiche e di diffusione\n",
" df = add_atmospheric_features(df)\n",
" df = add_diffusion_features(df)\n",
"\n",
" # 4. Feature di persistenza e pattern\n",
" df = add_persistence_features(df)\n",
" df = add_weather_pattern_features(df)\n",
"\n",
" # 5. Feature di efficienza e stagionalità\n",
" df = add_efficiency_features(df)\n",
" df = add_advanced_seasonal_features(df)\n",
"\n",
" # 6. Interazioni e feature derivate\n",
" df = add_basic_interactions(df)\n",
" df = add_rolling_and_lag_features(df)\n",
" df = add_condition_indicators(df)\n",
"\n",
" # 7. Nuove feature di conversione fisica\n",
" df = add_physics_based_conversion_features(df)\n",
"\n",
" # 8. One-hot encoding delle feature categoriche\n",
" df = pd.get_dummies(df, columns=['season', 'time_period'])\n",
"\n",
" return df\n",
"\n",
"\n",
"def prepare_advanced_data(df):\n",
" \"\"\"\n",
" Prepare data for advanced modeling with proper datetime handling\n",
" \"\"\"\n",
" # Assicuriamoci che abbiamo una copia del DataFrame\n",
" df = df.copy()\n",
"\n",
" # Apply feature engineering functions\n",
" df = add_advanced_features(df)\n",
"\n",
" #all_columns = list(df.columns)\n",
" #print(all_columns)\n",
"\n",
" features = {\n",
" # Primary Features (strong direct correlation)\n",
" 'primary_features': [\n",
" 'uvindex',\n",
" 'cloudcover',\n",
" 'visibility',\n",
" 'temp',\n",
" 'pressure',\n",
" 'humidity',\n",
" 'solarradiation'\n",
" ],\n",
"\n",
" # Astronomical and Temporal Features\n",
" 'astronomical_features': [\n",
" 'solar_elevation',\n",
" 'solar_angle',\n",
" 'day_length',\n",
" 'hour_sin',\n",
" 'hour_cos',\n",
" 'day_of_year_sin',\n",
" 'day_of_year_cos',\n",
" 'month_sin',\n",
" 'month_cos',\n",
" 'solar_noon',\n",
" 'daylight_correction'\n",
" ],\n",
"\n",
" # Key Indices and Interactions\n",
" 'key_interactions': [\n",
" 'clear_sky_index',\n",
" 'atmospheric_attenuation',\n",
" 'theoretical_radiation',\n",
" 'expected_radiation',\n",
" 'cloud_elevation',\n",
" 'visibility_elevation',\n",
" 'uv_cloud_interaction',\n",
" 'temp_radiation_potential',\n",
" 'air_mass_index',\n",
" 'atmospheric_stability',\n",
" 'vapor_pressure_deficit',\n",
" 'diffusion_index',\n",
" 'atmospheric_transmittance',\n",
" 'temp_humidity_interaction',\n",
" 'clear_sky_factor'\n",
" ],\n",
"\n",
" # Rolling Features (temporal trends)\n",
" 'rolling_features': [\n",
" 'cloud_rolling_12h',\n",
" 'temp_rolling_12h',\n",
" 'uv_rolling_12h',\n",
" 'cloudcover_rolling_mean_6h',\n",
" 'temp_rolling_mean_6h',\n",
" 'energy_rolling_mean_6h',\n",
" 'uv_rolling_mean_6h',\n",
" 'energy_volatility',\n",
" 'uv_volatility'\n",
" ],\n",
"\n",
" # Lag Features\n",
" 'lag_features': [\n",
" 'temp_1h_lag',\n",
" 'cloudcover_1h_lag',\n",
" 'humidity_1h_lag',\n",
" 'energy_lag_1h',\n",
" 'uv_lag_1h'\n",
" ],\n",
"\n",
" # Efficiency and Performance Features\n",
" 'efficiency_features': [\n",
" 'temp_losses',\n",
" 'soiling_loss_factor',\n",
" 'estimated_efficiency',\n",
" 'production_potential',\n",
" 'system_performance_ratio',\n",
" 'conversion_efficiency_ratio'\n",
" ],\n",
"\n",
" # Weather Pattern Features\n",
" 'weather_pattern_features': [\n",
" 'clear_sky_duration',\n",
" 'weather_variability_index',\n",
" 'temp_stability',\n",
" 'humidity_stability',\n",
" 'cloudcover_stability'\n",
" ],\n",
"\n",
" # Categorical Features\n",
" 'categorical_features': [\n",
" 'season_Spring',\n",
" 'season_Summer',\n",
" 'season_Autumn',\n",
" 'season_Winter',\n",
" 'time_period_Morning',\n",
" 'time_period_Afternoon',\n",
" 'time_period_Evening',\n",
" 'time_period_Night'\n",
" ]\n",
" }\n",
"\n",
" final_features = [feature for group in features.values() for feature in group]\n",
"\n",
" if not isinstance(df.index, pd.DatetimeIndex):\n",
" if 'datetime' in df.columns:\n",
" df['datetime'] = pd.to_datetime(df['datetime'])\n",
" df.set_index('datetime', inplace=True)\n",
" else:\n",
" raise ValueError(\"No datetime column or index found in DataFrame\")\n",
"\n",
" # Ordiniamo il DataFrame per datetime\n",
" df = df.sort_index()\n",
"\n",
" # Handle missing values\n",
" target_variables = ['solarradiation', 'solarenergy', 'uvindex']\n",
" for column in final_features + target_variables:\n",
" if column in df.columns:\n",
" if isinstance(df.index, pd.DatetimeIndex):\n",
" df[column] = df[column].interpolate(method='time')\n",
" else:\n",
" df[column] = df[column].interpolate(method='linear')\n",
"\n",
" df.fillna(0, inplace=True)\n",
"\n",
" # Temporal split\n",
" data_after_2010 = df[df['year'] >= 2010].copy()\n",
" data_before_2010 = df[df['year'] < 2010].copy()\n",
"\n",
" X = data_after_2010[final_features]\n",
" y = data_after_2010['solarenergy']\n",
" X_to_predict = data_before_2010[final_features]\n",
"\n",
" # Train-test split\n",
" X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.13, random_state=random_state_value, shuffle=False\n",
" )\n",
"\n",
" # Scaling\n",
" scaler_X = RobustScaler()\n",
" X_train_scaled = scaler_X.fit_transform(X_train)\n",
" X_test_scaled = scaler_X.transform(X_test)\n",
" X_to_predict_scaled = scaler_X.transform(X_to_predict)\n",
"\n",
" scaler_y = RobustScaler()\n",
" y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))\n",
" y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))\n",
"\n",
" # Print info about selected features\n",
" print(\"\\nSelected features:\")\n",
" print(f\"Number of features: {len(final_features)}\")\n",
" print(\"Features list:\", final_features)\n",
"\n",
" return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, scaler_X, scaler_y, final_features, X_to_predict_scaled\n",
"\n",
"\n",
"def create_sequence_data(X, sequence_length=24):\n",
" \"\"\"\n",
" Converts data into sequences for LSTM input\n",
" sequence_length represents how many previous hours to consider\n",
" \"\"\"\n",
" sequences = []\n",
" for i in range(len(X) - sequence_length + 1):\n",
" sequences.append(X[i:i + sequence_length])\n",
" return np.array(sequences)\n",
"\n",
"\n",
"def prepare_hybrid_data(df):\n",
" X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, scaler_X, scaler_y, features, X_to_predict_scaled = prepare_advanced_data(df)\n",
"\n",
" # Convert data into sequences\n",
" sequence_length = 24 # 24 hours of historical data\n",
"\n",
" X_train_seq = create_sequence_data(X_train_scaled, sequence_length)\n",
" X_test_seq = create_sequence_data(X_test_scaled, sequence_length)\n",
"\n",
" # Adjust y by removing the first (sequence_length-1) elements\n",
" y_train = y_train_scaled[sequence_length - 1:]\n",
" y_test = y_test_scaled[sequence_length - 1:]\n",
"\n",
" X_to_predict_seq = create_sequence_data(X_to_predict_scaled, sequence_length)\n",
"\n",
" return X_train_seq, X_test_seq, y_train, y_test, scaler_X, scaler_y, features, X_to_predict_seq"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "570b18f2caa3e0db",
"metadata": {},
"outputs": [],
"source": [
"def create_solarenergy_model(input_shape, folder_name, l2_lambda=0.005, min_output=0, max_output=4.0):\n",
" from tensorflow import keras\n",
" from keras.models import Model\n",
" from keras.layers import (\n",
" Input, Dense, Conv1D, BatchNormalization, Dropout, \n",
" MultiHeadAttention, LayerNormalization, Lambda,\n",
" Concatenate, Activation, Bidirectional, LSTM, Add\n",
" )\n",
" from keras.regularizers import l2\n",
" from keras.optimizers import AdamW\n",
" import tensorflow as tf\n",
" import numpy as np\n",
" import tensorflow_addons as tfa\n",
" from tensorflow.keras.optimizers.schedules import CosineDecayRestarts\n",
" \n",
" # Input layer\n",
" inputs = Input(shape=input_shape)\n",
" \n",
" # Feature groups definition\n",
" feature_dims = {\n",
" 'solar': [6, 7, 8, 9, 16, 18, 19, 20, 21],\n",
" 'weather': [0, 1, 2, 3, 4, 5],\n",
" 'temporal': [10, 11, 12, 13, 14, 15],\n",
" 'derived': [22, 23, 24, 25, 26, 27, 28, 29, 30, 31],\n",
" 'rolling': [33, 34, 35, 36, 37, 38, 39],\n",
" 'lag': [40, 41, 42, 43, 44],\n",
" 'performance': [45, 46, 47, 48, 49, 50]\n",
" }\n",
" \n",
" # Feature extraction\n",
" feature_tensors = {}\n",
" for name, indices in feature_dims.items():\n",
" valid_indices = [i for i in indices if i < input_shape[-1]]\n",
" if valid_indices:\n",
" feature_tensors[name] = Lambda(\n",
" lambda x, idx=valid_indices: tf.gather(x, idx, axis=-1)\n",
" )(inputs)\n",
" \n",
" # Feature processing with residual connections\n",
" def process_feature_group(tensor, units, name):\n",
" x = Conv1D(units, kernel_size=3, padding='same', activation='swish',\n",
" kernel_regularizer=l2(l2_lambda))(tensor)\n",
" x = BatchNormalization()(x)\n",
" x = Dropout(0.2)(x)\n",
" \n",
" residual = Conv1D(units, kernel_size=1, padding='same')(tensor)\n",
" x = Add()([x, residual])\n",
" x = LayerNormalization()(x)\n",
" \n",
" return x\n",
" \n",
" # Process each feature group\n",
" processed_features = {}\n",
" for name, tensor in feature_tensors.items():\n",
" units = 64 if name == 'solar' else 32 if name == 'weather' else 16\n",
" processed_features[name] = process_feature_group(tensor, units, name)\n",
" \n",
" # Enhanced attention mechanism\n",
" def attention_block(x, num_heads=4):\n",
" attention_output = MultiHeadAttention(\n",
" num_heads=num_heads, \n",
" key_dim=x.shape[-1] // num_heads\n",
" )(x, x)\n",
" x = LayerNormalization()(x + attention_output)\n",
" \n",
" ffn = Dense(x.shape[-1] * 2, activation='swish')(x)\n",
" ffn = Dropout(0.1)(ffn)\n",
" ffn = Dense(x.shape[-1])(ffn)\n",
" \n",
" return LayerNormalization()(x + ffn)\n",
" \n",
" # Merge primary features with attention\n",
" primary_features = [\n",
" processed_features['solar'],\n",
" processed_features['weather'],\n",
" processed_features['performance']\n",
" ]\n",
" primary_context = Concatenate(axis=-1)(primary_features)\n",
" primary_context = attention_block(primary_context)\n",
" \n",
" # Merge secondary features\n",
" secondary_features = [\n",
" processed_features[name] for name in ['temporal', 'rolling', 'lag']\n",
" if name in processed_features\n",
" ]\n",
" if secondary_features:\n",
" secondary_context = Concatenate(axis=-1)(secondary_features)\n",
" secondary_context = attention_block(secondary_context)\n",
" else:\n",
" secondary_context = primary_context\n",
" \n",
" # Final feature merge\n",
" combined = Concatenate(axis=-1)([\n",
" primary_context, \n",
" secondary_context,\n",
" processed_features['derived']\n",
" ])\n",
" \n",
" # Sequential processing with residual LSTM\n",
" def residual_lstm_block(x, units):\n",
" lstm_out = Bidirectional(LSTM(units, return_sequences=True))(x)\n",
" residual = Conv1D(units * 2, kernel_size=1, padding='same')(x)\n",
" x = Add()([lstm_out, residual])\n",
" x = LayerNormalization()(x)\n",
" return x\n",
" \n",
" x = residual_lstm_block(combined, 128)\n",
" x = residual_lstm_block(x, 64)\n",
" x = Bidirectional(LSTM(64))(x)\n",
" x = Dropout(0.2)(x)\n",
" \n",
" # Classification branch\n",
" class_x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n",
" class_x = BatchNormalization()(class_x)\n",
" class_x = Dropout(0.2)(class_x)\n",
" class_x = Dense(64, activation='swish', kernel_regularizer=l2(l2_lambda))(class_x)\n",
" class_output = Dense(1, activation='sigmoid', name='classification_output')(class_x)\n",
" \n",
" # Enhanced regression branch with multiple pathways\n",
" def create_regression_pathway(x, name):\n",
" x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n",
" x = BatchNormalization()(x)\n",
" x = Dropout(0.2)(x)\n",
" \n",
" residual = x\n",
" x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n",
" x = BatchNormalization()(x)\n",
" x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n",
" x = Add()([x, residual])\n",
" \n",
" x = Dense(64, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n",
" return Dense(1, name=f'{name}_output')(x)\n",
" \n",
" # Create specialized regression pathways\n",
" low_range = create_regression_pathway(x, 'low_range')\n",
" mid_range = create_regression_pathway(x, 'mid_range')\n",
" high_range = create_regression_pathway(x, 'high_range')\n",
" \n",
" # Create context vector for attention\n",
" context = Dense(64, activation='swish')(x)\n",
" \n",
" # Calculate attention scores\n",
" attention_scores = Dense(3, activation='softmax')(context)\n",
" \n",
" # Combine predictions using attention weights\n",
" reg_output = Lambda(\n",
" lambda x: x[0][:, 0:1] * x[1] + x[0][:, 1:2] * x[2] + x[0][:, 2:3] * x[3],\n",
" name='regression_output'\n",
" )([attention_scores, low_range, mid_range, high_range])\n",
"\n",
" # Final output processing remains the same...\n",
" final_x = Dense(256, activation='swish', kernel_regularizer=l2(l2_lambda))(x)\n",
" final_x = BatchNormalization()(final_x)\n",
" final_x = Dropout(0.2)(final_x)\n",
" \n",
" residual = final_x\n",
" final_x = Dense(256, activation='swish', kernel_regularizer=l2(l2_lambda))(final_x)\n",
" final_x = BatchNormalization()(final_x)\n",
" final_x = Dense(256, activation='swish', kernel_regularizer=l2(l2_lambda))(final_x)\n",
" final_x = Add()([final_x, residual])\n",
" \n",
" final_x = Dense(128, activation='swish', kernel_regularizer=l2(l2_lambda))(final_x)\n",
" final_x = Dense(1)(final_x)\n",
" final_output = Lambda(\n",
" lambda x: tf.clip_by_value(x, min_output, max_output),\n",
" name='final_output'\n",
" )(final_x)\n",
" \n",
" # Build model with all outputs\n",
" model = Model(\n",
" inputs=inputs,\n",
" outputs=[class_output, reg_output, final_output]\n",
" )\n",
" \n",
" # Enhanced loss functions\n",
" def enhanced_regression_loss(y_true, y_pred):\n",
" mae = tf.abs(y_true - y_pred)\n",
" mse = tf.square(y_true - y_pred)\n",
" \n",
" value_ranges = tf.cast(y_true > 2.0, tf.float32) * 1.5 + \\\n",
" tf.cast(tf.logical_and(y_true <= 2.0, y_true > 1.0), tf.float32) * 1.2 + \\\n",
" tf.cast(y_true <= 1.0, tf.float32)\n",
" \n",
" weighted_loss = (0.5 * mae + 0.5 * mse) * value_ranges\n",
" return tf.reduce_mean(weighted_loss)\n",
" \n",
" def final_loss(y_true, y_pred):\n",
" y_true = tf.clip_by_value(y_true, min_output, max_output)\n",
" mae = tf.reduce_mean(tf.abs(y_true - y_pred))\n",
" mse = tf.reduce_mean(tf.square(y_true - y_pred))\n",
" return 0.5 * mae + 0.5 * mse\n",
" \n",
" # Learning rate schedule\n",
" clr = CosineDecayRestarts(\n",
" initial_learning_rate=2e-4,\n",
" first_decay_steps=1000,\n",
" t_mul=2.0,\n",
" m_mul=0.9,\n",
" alpha=1e-7\n",
" )\n",
" \n",
" # Optimizer\n",
" optimizer = AdamW(\n",
" learning_rate=clr,\n",
" weight_decay=0.01,\n",
" clipnorm=1.0\n",
" )\n",
" \n",
" # Compile model\n",
" model.compile(\n",
" optimizer=optimizer,\n",
" loss={\n",
" 'classification_output': 'binary_crossentropy',\n",
" 'regression_output': enhanced_regression_loss,\n",
" 'final_output': final_loss\n",
" },\n",
" loss_weights={\n",
" 'classification_output': 0.2,\n",
" 'regression_output': 0.4,\n",
" 'final_output': 0.4\n",
" }\n",
" )\n",
"\n",
" # Plot model architecture\n",
" try:\n",
" plot_model(\n",
" model,\n",
" to_file=f'{folder_name}_model_architecture.png',\n",
" show_shapes=True,\n",
" show_layer_names=True,\n",
" dpi=150,\n",
" show_layer_activations=True\n",
" )\n",
" except Exception as e:\n",
" print(f\"Warning: Could not plot model architecture: {e}\")\n",
"\n",
" return model\n",
"\n",
"\n",
"def evaluate_solarenergy_predictions(y_true, y_pred, hour=None, folder_name=None):\n",
" \"\"\"\n",
" Comprehensive evaluation of solar energy predictions with detailed analysis and visualizations.\n",
"\n",
" Parameters:\n",
" -----------\n",
" y_true : array-like\n",
" Actual solar energy values (kWh)\n",
" y_pred : array-like\n",
" Predicted solar energy values (kWh)\n",
" hour : array-like, optional\n",
" Array of hours corresponding to predictions, for temporal analysis\n",
" folder_name : str, optional\n",
" Directory to save analysis plots\n",
"\n",
" Returns:\n",
" --------\n",
" dict\n",
" Dictionary containing all calculated metrics\n",
" \"\"\"\n",
"\n",
" # Data preparation\n",
" y_true = np.array(y_true).ravel()\n",
" y_pred = np.array(y_pred).ravel()\n",
" errors = y_pred - y_true\n",
"\n",
" # Basic metrics calculation\n",
" mae_raw = mean_absolute_error(y_true, y_pred)\n",
" rmse_raw = np.sqrt(mean_squared_error(y_true, y_pred))\n",
" r2_raw = r2_score(y_true, y_pred)\n",
"\n",
" # Corrected MAPE calculation\n",
" mask = y_true > 10 # Consider only values above 10 kWh\n",
" if np.any(mask):\n",
" mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100\n",
" else:\n",
" mape = np.nan\n",
"\n",
" # Corrected error margin accuracy\n",
" within_5_percent = np.mean(np.abs(errors) <= 5) * 100 # Within 5 kWh\n",
" within_10_percent = np.mean(np.abs(errors) <= 10) * 100 # Within 10 kWh\n",
" within_20_percent = np.mean(np.abs(errors) <= 20) * 100 # Within 20 kWh\n",
"\n",
" # Energy level classification\n",
" def get_energy_level(value):\n",
" if value <= 0.5:\n",
" return 'Very Low'\n",
" elif value <= 2.0:\n",
" return 'Low'\n",
" elif value <= 4.0:\n",
" return 'Moderate'\n",
" elif value <= 6.0:\n",
" return 'High'\n",
" elif value <= 8.0:\n",
" return 'Very High'\n",
" else:\n",
" return 'Extreme'\n",
"\n",
" # Calculate energy levels\n",
" y_true_levels = [get_energy_level(v) for v in y_true]\n",
" y_pred_levels = [get_energy_level(v) for v in y_pred]\n",
" level_accuracy = np.mean([t == p for t, p in zip(y_true_levels, y_pred_levels)])\n",
"\n",
" unique_levels = sorted(list(set(y_true_levels + y_pred_levels)))\n",
"\n",
" # Print main metrics\n",
" print(\"\\nSolar Energy Prediction Metrics:\")\n",
" print(\"\\nAbsolute Metrics:\")\n",
" print(f\"MAE: {mae_raw:.2f} kWh\")\n",
" print(f\"RMSE: {rmse_raw:.2f} kWh\")\n",
" print(f\"R² Score: {r2_raw:.3f}\")\n",
" print(f\"MAPE: {mape:.2f}%\" if not np.isnan(mape) else \"MAPE: N/A (insufficient data)\")\n",
"\n",
" print(\"\\nAccuracy Metrics:\")\n",
" print(f\"Within ±5 kWh: {within_5_percent:.1f}%\")\n",
" print(f\"Within ±10 kWh: {within_10_percent:.1f}%\")\n",
" print(f\"Within ±20 kWh: {within_20_percent:.1f}%\")\n",
"\n",
" print(\"\\nLevel Accuracy:\")\n",
" print(f\"Level Accuracy: {level_accuracy * 100:.1f}%\")\n",
"\n",
" # Confusion matrix for energy levels\n",
" cm = confusion_matrix(y_true_levels, y_pred_levels, labels=unique_levels)\n",
" print(\"\\nConfusion Matrix for Energy Levels:\")\n",
" cm_df = pd.DataFrame(\n",
" cm,\n",
" columns=unique_levels,\n",
" index=unique_levels\n",
" )\n",
" print(cm_df)\n",
"\n",
" # Time period analysis\n",
" if hour is not None:\n",
" day_periods = {\n",
" 'Morning (5-11)': (5, 11),\n",
" 'Noon (11-13)': (11, 13),\n",
" 'Afternoon (13-17)': (13, 17),\n",
" 'Evening (17-21)': (17, 21),\n",
" 'Night (21-5)': (21, 5)\n",
" }\n",
"\n",
" print(\"\\nAnalysis by Time Period:\")\n",
" for period, (start, end) in day_periods.items():\n",
" if start < end:\n",
" mask = (hour >= start) & (hour < end)\n",
" else:\n",
" mask = (hour >= start) | (hour < end)\n",
"\n",
" if np.any(mask):\n",
" period_mae = mean_absolute_error(y_true[mask], y_pred[mask])\n",
"\n",
" # Corrected period MAPE calculation\n",
" period_mask = mask & (y_true > 10)\n",
" if np.any(period_mask):\n",
" period_mape = np.mean(np.abs((y_true[period_mask] - y_pred[period_mask]) / y_true[period_mask])) * 100\n",
" print(f\"\\n{period}:\")\n",
" print(f\"MAE: {period_mae:.2f} kWh\")\n",
" print(f\"MAPE: {period_mape:.2f}%\")\n",
" else:\n",
" print(f\"\\n{period}:\")\n",
" print(f\"MAE: {period_mae:.2f} kWh\")\n",
" print(\"MAPE: N/A (insufficient data)\")\n",
"\n",
" # Visualizations\n",
" if folder_name is not None:\n",
" try:\n",
" # Figure 1: Main analysis plots\n",
" plt.figure(figsize=(20, 15))\n",
"\n",
" # Plot 1: Scatter plot of actual vs predicted values\n",
" plt.subplot(3, 2, 1)\n",
" plt.scatter(y_true, y_pred, alpha=0.5)\n",
" plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)\n",
" plt.xlabel('Actual Energy (kWh)')\n",
" plt.ylabel('Predicted Energy (kWh)')\n",
" plt.title('Actual vs Predicted Values')\n",
" plt.grid(True)\n",
"\n",
" # Plot 2: Absolute error distribution\n",
" plt.subplot(3, 2, 2)\n",
" plt.hist(errors, bins=50, alpha=0.7)\n",
" plt.xlabel('Prediction Error (kWh)')\n",
" plt.ylabel('Frequency')\n",
" plt.title('Error Distribution')\n",
" plt.grid(True)\n",
"\n",
" # Plot 3: Percentage error distribution (only for values > 0.5 kWh)\n",
" plt.subplot(3, 2, 3)\n",
" mask = y_true > 0.5\n",
" if np.any(mask):\n",
" percentage_errors = ((y_pred[mask] - y_true[mask]) / y_true[mask]) * 100\n",
" plt.hist(np.clip(percentage_errors, -100, 100), bins=50, alpha=0.7)\n",
" plt.xlabel('Percentage Error (%)')\n",
" plt.ylabel('Frequency')\n",
" plt.title('Percentage Error Distribution (for values > 0.5 kWh)')\n",
" plt.grid(True)\n",
"\n",
" # Plot 4: Errors vs actual values\n",
" plt.subplot(3, 2, 4)\n",
" plt.scatter(y_true, errors, alpha=0.5)\n",
" plt.axhline(y=0, color='r', linestyle='--')\n",
" plt.xlabel('Actual Energy (kWh)')\n",
" plt.ylabel('Error (kWh)')\n",
" plt.title('Errors vs Actual Values')\n",
" plt.grid(True)\n",
"\n",
" # Plot 5: Error boxplot by Energy level\n",
" plt.subplot(3, 2, 5)\n",
" sns.boxplot(x=[get_energy_level(v) for v in y_true], y=errors)\n",
" plt.xticks(rotation=45)\n",
" plt.xlabel('Energy Level')\n",
" plt.ylabel('Error (kWh)')\n",
" plt.title('Error Distribution by Level')\n",
"\n",
" # Plot 6: Confusion matrix heatmap\n",
" plt.subplot(3, 2, 6)\n",
" sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')\n",
" plt.title('Confusion Matrix')\n",
" plt.xticks(rotation=45)\n",
" plt.yticks(rotation=45)\n",
"\n",
" plt.tight_layout()\n",
" filename = f'{folder_name}_energy_analysis.png'\n",
" plt.savefig(filename, dpi=300, bbox_inches='tight')\n",
" print(f\"\\nPlot saved as: {filename}\")\n",
" plt.close()\n",
"\n",
" except Exception as e:\n",
" print(f\"\\nError saving plots: {str(e)}\")\n",
"\n",
" # Additional error statistics\n",
" print(\"\\nError Statistics:\")\n",
" print(f\"Mean error: {np.mean(errors):.3f}\")\n",
" print(f\"Error standard deviation: {np.std(errors):.3f}\")\n",
" print(f\"Median error: {np.median(errors):.3f}\")\n",
" print(f\"95th percentile absolute error: {np.percentile(np.abs(errors), 95):.3f}\")\n",
"\n",
" # Return structured metrics\n",
" metrics = {\n",
" 'absolute': {\n",
" 'mae': mae_raw,\n",
" 'rmse': rmse_raw,\n",
" 'r2': r2_raw,\n",
" 'mape': float(mape) if not np.isnan(mape) else None\n",
" },\n",
" 'accuracy': {\n",
" 'within_5_wm2': float(within_5_percent),\n",
" 'within_10_wm2': float(within_10_percent),\n",
" 'within_20_wm2': float(within_20_percent)\n",
" },\n",
" 'categorical': {\n",
" 'level_accuracy': float(level_accuracy)\n",
" },\n",
" 'error_stats': {\n",
" 'mean': float(np.mean(errors)),\n",
" 'std': float(np.std(errors)),\n",
" 'median': float(np.median(errors)),\n",
" 'p95_abs': float(np.percentile(np.abs(errors), 95))\n",
" }\n",
" }\n",
"\n",
" return metrics\n",
"\n",
"\n",
"def plot_training_history(history, folder_name=None):\n",
" \"\"\"\n",
" Visualize and save training history for the hybrid model\n",
" \"\"\"\n",
" plt.figure(figsize=(15, 10))\n",
"\n",
" # Loss plots\n",
" plt.subplot(2, 2, 1)\n",
" plt.plot(history.history['classification_output_loss'], label='Class Loss')\n",
" plt.plot(history.history['regression_output_loss'], label='Reg Loss')\n",
" plt.plot(history.history['final_output_loss'], label='Final Loss')\n",
" plt.plot(history.history['val_classification_output_loss'], label='Val Class Loss')\n",
" plt.plot(history.history['val_regression_output_loss'], label='Val Reg Loss')\n",
" plt.plot(history.history['val_final_output_loss'], label='Val Final Loss')\n",
" plt.title('Model Losses')\n",
" plt.xlabel('Epoch')\n",
" plt.ylabel('Loss')\n",
" plt.legend()\n",
" plt.grid(True)\n",
"\n",
" # Classification metrics\n",
" plt.subplot(2, 2, 2)\n",
" plt.plot(history.history['classification_output_accuracy'], label='Class Acc')\n",
" plt.plot(history.history['val_classification_output_accuracy'], label='Val Class Acc')\n",
" plt.plot(history.history['classification_output_auc'], label='Class AUC')\n",
" plt.plot(history.history['val_classification_output_auc'], label='Val Class AUC')\n",
" plt.title('Classification Metrics')\n",
" plt.xlabel('Epoch')\n",
" plt.ylabel('Metric Value')\n",
" plt.legend()\n",
" plt.grid(True)\n",
"\n",
" # Regression metrics\n",
" plt.subplot(2, 2, 3)\n",
" plt.plot(history.history['regression_output_mae'], label='Reg MAE')\n",
" plt.plot(history.history['val_regression_output_mae'], label='Val Reg MAE')\n",
" plt.title('Regression MAE')\n",
" plt.xlabel('Epoch')\n",
" plt.ylabel('MAE')\n",
" plt.legend()\n",
" plt.grid(True)\n",
"\n",
" # Final output metrics\n",
" plt.subplot(2, 2, 4)\n",
" plt.plot(history.history['final_output_mae'], label='Final MAE')\n",
" plt.plot(history.history['val_final_output_mae'], label='Val Final MAE')\n",
" plt.title('Final Output MAE')\n",
" plt.xlabel('Epoch')\n",
" plt.ylabel('MAE')\n",
" plt.legend()\n",
" plt.grid(True)\n",
"\n",
" plt.tight_layout()\n",
"\n",
" if folder_name is not None:\n",
" filename = f'{folder_name}_training_history.png'\n",
" plt.savefig(filename, dpi=300, bbox_inches='tight')\n",
" print(f\"\\nTraining history plot saved as: {filename}\")\n",
"\n",
" # Save history to JSON\n",
" history_dict = history.history\n",
" json_filename = f'{folder_name}_training_history.json'\n",
" with open(json_filename, 'w') as f:\n",
" json.dump(history_dict, f)\n",
" print(f\"Training history saved as: {json_filename}\")\n",
"\n",
" plt.show()\n",
"\n",
"def calculate_metrics(y_true, y_class, y_reg, y_final, min_output, max_output):\n",
" \"\"\"\n",
" Calculates comprehensive metrics for the solar energy prediction model.\n",
" \n",
" Parameters:\n",
" -----------\n",
" y_true : array-like\n",
" Ground truth values\n",
" y_class : array-like\n",
" Classification predictions (probability of non-zero values)\n",
" y_reg : array-like\n",
" Regression predictions (unrestricted values)\n",
" y_final : array-like\n",
" Final clipped predictions\n",
" min_output : float\n",
" Minimum allowed output value\n",
" max_output : float\n",
" Maximum allowed output value\n",
" \n",
" Returns:\n",
" --------\n",
" dict\n",
" Dictionary containing all calculated metrics\n",
" \"\"\"\n",
" from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix\n",
" \n",
" # Ensure proper array formatting and dimensionality\n",
" y_true = np.array(y_true).flatten()\n",
" y_class = np.array(y_class).flatten()\n",
" y_reg = np.array(y_reg).flatten()\n",
" y_final = np.array(y_final).flatten()\n",
" \n",
" # Validate input dimensions\n",
" assert len(y_true) == len(y_class) == len(y_reg) == len(y_final), \\\n",
" \"All input arrays must have the same length\"\n",
" \n",
" # Classification metrics with error handling\n",
" print(\"\\nClassification Metrics:\")\n",
" try:\n",
" y_true_binary = (y_true > 0).astype(int)\n",
" y_pred_binary = (y_class > 0.5).astype(int)\n",
" \n",
" accuracy = np.mean((y_class > 0.5) == (y_true > 0)) * 100\n",
" auc_roc = roc_auc_score(y_true > 0, y_class)\n",
" print(f\"Accuracy: {accuracy:.2f}%\")\n",
" print(f\"AUC-ROC: {auc_roc:.4f}\")\n",
" \n",
" print(\"\\nConfusion Matrix:\")\n",
" conf_matrix = confusion_matrix(y_true_binary, y_pred_binary)\n",
" print(conf_matrix)\n",
" \n",
" print(\"\\nClassification Report:\")\n",
" class_report = classification_report(\n",
" y_true_binary, \n",
" y_pred_binary,\n",
" target_names=['Zero', 'Non-Zero'],\n",
" digits=4\n",
" )\n",
" print(class_report)\n",
" except Exception as e:\n",
" print(f\"Error in classification metrics calculation: {str(e)}\")\n",
" \n",
" # Regression metrics with error handling\n",
" print(\"\\nRegression Metrics (non-zero values):\")\n",
" mask_nonzero = y_true > 0\n",
" if np.any(mask_nonzero):\n",
" try:\n",
" y_true_nonzero = y_true[mask_nonzero]\n",
" y_reg_nonzero = y_reg[mask_nonzero]\n",
" \n",
" # Range validation\n",
" out_of_range = np.sum(\n",
" (y_reg_nonzero < min_output) | \n",
" (y_reg_nonzero > max_output)\n",
" )\n",
" \n",
" # Error metrics with numerical stability\n",
" epsilon = 1e-7\n",
" diff = np.abs((y_true_nonzero - y_reg_nonzero) / \n",
" (y_true_nonzero + epsilon))\n",
" diff = np.clip(diff, 0, 1)\n",
" \n",
" # Calculate metrics\n",
" mape = np.mean(diff) * 100\n",
" within_10_percent = np.mean(diff <= 0.10) * 100\n",
" mae = np.mean(np.abs(y_true_nonzero - y_reg_nonzero))\n",
" rmse = np.sqrt(np.mean(np.square(y_true_nonzero - y_reg_nonzero)))\n",
" \n",
" print(f\"Out of range: {out_of_range} predictions\")\n",
" print(f\"MAPE: {mape:.2f}%\")\n",
" print(f\"Within ±10%: {within_10_percent:.2f}%\")\n",
" print(f\"MAE: {mae:.2f}\")\n",
" print(f\"RMSE: {rmse:.2f}\")\n",
" except Exception as e:\n",
" print(f\"Error in regression metrics calculation: {str(e)}\")\n",
" else:\n",
" print(\"No non-zero values in this batch\")\n",
" \n",
" # Final output metrics with error handling\n",
" print(\"\\nFinal Combined Output Metrics:\")\n",
" try:\n",
" # Ensure outputs are within bounds\n",
" out_of_range = np.sum((y_final < min_output) | (y_final > max_output))\n",
" \n",
" # Calculate metrics with numerical stability\n",
" epsilon = 1e-7\n",
" diff = np.abs((y_true - y_final) / (y_true + epsilon))\n",
" diff = np.clip(diff, 0, 1)\n",
" \n",
" mape = np.mean(diff) * 100\n",
" within_2_percent = np.mean(diff <= 0.02) * 100\n",
" within_5_percent = np.mean(diff <= 0.05) * 100\n",
" within_10_percent = np.mean(diff <= 0.10) * 100\n",
" within_20_percent = np.mean(diff <= 0.20) * 100\n",
" mae = np.mean(np.abs(y_true - y_final))\n",
" rmse = np.sqrt(np.mean(np.square(y_true - y_final)))\n",
" \n",
" print(f\"Out of range: {out_of_range} predictions\")\n",
" print(f\"MAPE: {mape:.2f}%\")\n",
" print(f\"Within ±2%: {within_2_percent:.2f}%\")\n",
" print(f\"Within ±5%: {within_5_percent:.2f}%\")\n",
" print(f\"Within ±10%: {within_10_percent:.2f}%\")\n",
" print(f\"Within ±20%: {within_20_percent:.2f}%\")\n",
" print(f\"MAE: {mae:.2f}\")\n",
" print(f\"RMSE: {rmse:.2f}\")\n",
" except Exception as e:\n",
" print(f\"Error in final output metrics calculation: {str(e)}\")\n",
"\n",
"def train_hybrid_model(model, X_train, y_train, X_test, y_test, epochs=100, batch_size=32, folder_name='solarenergy', min_output=0, max_output=1):\n",
" \"\"\"\n",
" Advanced training function for the hybrid solar energy model\n",
" \"\"\" \n",
" # Prepare binary targets for classification\n",
" y_train_binary = (y_train > 0).astype(float)\n",
" y_test_binary = (y_test > 0).astype(float)\n",
"\n",
" # Training targets dictionary - usando i nomi esatti degli output del modello\n",
" train_targets = {\n",
" 'classification_output': y_train_binary,\n",
" 'regression_output': y_train, # Questo nome corrisponde a quello nel modello\n",
" 'final_output': y_train\n",
" }\n",
"\n",
" # Validation targets dictionary\n",
" test_targets = {\n",
" 'classification_output': y_test_binary,\n",
" 'regression_output': y_test, # Questo nome corrisponde a quello nel modello\n",
" 'final_output': y_test\n",
" }\n",
"\n",
" def evaluate_epoch(epoch, logs):\n",
" if epoch % 20 == 0:\n",
" print(f\"\\nEpoch {epoch + 1} Detailed Metrics:\")\n",
" predictions = model.predict(X_test, verbose=0)\n",
" calculate_metrics(y_test, *predictions, min_output, max_output)\n",
"\n",
" callbacks = [\n",
" tf.keras.callbacks.EarlyStopping(\n",
" monitor='val_final_output_loss',\n",
" patience=35,\n",
" restore_best_weights=True,\n",
" mode='min',\n",
" verbose=1,\n",
" min_delta=1e-5\n",
" ),\n",
" tf.keras.callbacks.ModelCheckpoint(\n",
" filepath=f'{folder_name}_best_model.h5',\n",
" monitor='val_final_output_loss',\n",
" save_best_only=True,\n",
" mode='min',\n",
" save_weights_only=True # Modificato a True per evitare problemi di serializzazione\n",
" ),\n",
" tf.keras.callbacks.TensorBoard(\n",
" log_dir=f'./{folder_name}_logs',\n",
" histogram_freq=1,\n",
" write_graph=True,\n",
" update_freq='epoch'\n",
" ),\n",
" tf.keras.callbacks.LambdaCallback(on_epoch_end=evaluate_epoch),\n",
" tf.keras.callbacks.TerminateOnNaN()\n",
" ]\n",
"\n",
" '''\n",
" tf.keras.callbacks.ReduceLROnPlateau(\n",
" monitor='val_final_output_loss',\n",
" factor=0.8,\n",
" patience=10,\n",
" verbose=1,\n",
" mode='min',\n",
" min_delta=1e-4,\n",
" cooldown=2,\n",
" min_lr=1e-7\n",
" ),\n",
" '''\n",
" try:\n",
" history = model.fit(\n",
" X_train,\n",
" train_targets,\n",
" validation_data=(X_test, test_targets),\n",
" epochs=epochs,\n",
" batch_size=batch_size,\n",
" callbacks=callbacks,\n",
" verbose=1,\n",
" shuffle=False\n",
" )\n",
"\n",
" print(\"\\nTraining completed successfully!\")\n",
"\n",
" # Final evaluation\n",
" predictions = model.predict(X_test, verbose=0)\n",
" calculate_metrics(y_test, *predictions, min_output, max_output)\n",
"\n",
" return history\n",
"\n",
" except Exception as e:\n",
" print(f\"\\nError during training: {str(e)}\")\n",
" print(\"\\nModel output names:\", [output.name for output in model.outputs])\n",
" print(\"Training targets keys:\", train_targets.keys())\n",
" raise\n",
"\n",
" finally:\n",
" tf.keras.backend.clear_session()\n",
"\n",
"\n",
"def integrate_predictions(df, predictions, sequence_length=24):\n",
" \"\"\"\n",
" Integrates solar energy predictions into the original dataset for pre-2010 data.\n",
"\n",
" Parameters:\n",
" -----------\n",
" df : pandas.DataFrame\n",
" Original dataset\n",
" predictions : tuple\n",
" Tuple containing (classification_pred, regression_pred, final_pred)\n",
" - classification_pred: probability of non-zero values\n",
" - regression_pred: predicted values (used for non-zero cases)\n",
" - final_pred: final combined predictions\n",
" sequence_length : int\n",
" Sequence length used for predictions\n",
"\n",
" Returns:\n",
" --------\n",
" pandas.DataFrame\n",
" Updated dataset with solar energy predictions and additional prediction details\n",
" \"\"\"\n",
" # Convert datetime to datetime format if not already\n",
" df['datetime'] = pd.to_datetime(df['datetime'])\n",
"\n",
" # Identify pre-2010 rows\n",
" mask_pre_2010 = df['datetime'].dt.year < 2010\n",
"\n",
" # Unpack predictions\n",
" classification_pred, regression_pred, final_pred = predictions\n",
"\n",
" # Create temporary DataFrame with all predictions\n",
" dates_pre_2010 = df[mask_pre_2010]['datetime'].iloc[sequence_length - 1:]\n",
" predictions_df = pd.DataFrame({\n",
" 'datetime': dates_pre_2010,\n",
" 'solarenergy_predicted': final_pred.flatten(),\n",
" 'solarenergy_classification': classification_pred.flatten(),\n",
" 'solarenergy_regression': regression_pred.flatten()\n",
" })\n",
"\n",
" # Merge with original dataset\n",
" df = df.merge(predictions_df, on='datetime', how='left')\n",
"\n",
" # Update solar energy column where missing\n",
" df['solarenergy'] = df['solarenergy'].fillna(df['solarenergy_predicted'])\n",
"\n",
" # Print detailed statistics\n",
" print(\"\\nPrediction Integration Statistics:\")\n",
" print(f\"Added {len(final_pred)} predictions to dataset\")\n",
" print(f\"Rows with solar energy after integration: {df['solarenergy'].notna().sum()}\")\n",
"\n",
" # Analyze prediction components for the filled values\n",
" mask_filled = df['solarenergy'] == df['solarenergy_predicted']\n",
" if mask_filled.any():\n",
" filled_data = df[mask_filled]\n",
"\n",
" print(\"\\nFilled Values Analysis:\")\n",
" print(f\"Zero predictions (classification < 0.5): {(filled_data['solarenergy_classification'] < 0.5).sum()}\")\n",
" print(f\"Non-zero predictions (classification >= 0.5): {(filled_data['solarenergy_classification'] >= 0.5).sum()}\")\n",
"\n",
" # Distribution of predicted values\n",
" non_zero_pred = filled_data[filled_data['solarenergy_predicted'] > 0]\n",
" if len(non_zero_pred) > 0:\n",
" print(f\"\\nNon-zero predictions statistics:\")\n",
" print(f\"Mean: {non_zero_pred['solarenergy_predicted'].mean():.2f}\")\n",
" print(f\"Median: {non_zero_pred['solarenergy_predicted'].median():.2f}\")\n",
" print(f\"Std: {non_zero_pred['solarenergy_predicted'].std():.2f}\")\n",
"\n",
" # Optionally, you can keep or remove the intermediate prediction columns\n",
" columns_to_drop = ['solarenergy_predicted', 'solarenergy_classification',\n",
" 'solarenergy_regression']\n",
" df = df.drop(columns_to_drop, axis=1)\n",
"\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b3b0c2e65ddf484",
"metadata": {},
"outputs": [],
"source": [
"def analyze_distribution(data, solar_column='solarenergy', name = 'Solar Energy'):\n",
" \"\"\"\n",
" Analizza dettagliatamente la distribuzione della variabile solarenergy.\n",
"\n",
" Parameters:\n",
" -----------\n",
" data : pandas.DataFrame\n",
" DataFrame contenente la colonna solarenergy\n",
" solar_column : str, default='solarenergy'\n",
" Nome della colonna da analizzare\n",
"\n",
" Returns:\n",
" --------\n",
" dict\n",
" Dizionario contenente le statistiche principali\n",
" \"\"\"\n",
"\n",
" # Creiamo una figura con più subplot\n",
" fig = plt.figure(figsize=(20, 12))\n",
"\n",
" # 1. Statistiche di base\n",
" stats_dict = {\n",
" 'count': len(data[solar_column]),\n",
" 'missing': data[solar_column].isnull().sum(),\n",
" 'zeros': (data[solar_column] == 0).sum(),\n",
" 'mean': data[solar_column].mean(),\n",
" 'median': data[solar_column].median(),\n",
" 'std': data[solar_column].std(),\n",
" 'min': data[solar_column].min(),\n",
" 'max': data[solar_column].max(),\n",
" 'skewness': stats.skew(data[solar_column].dropna()),\n",
" 'kurtosis': stats.kurtosis(data[solar_column].dropna())\n",
" }\n",
"\n",
" # Calcolo dei percentili\n",
" percentiles = [1, 5, 10, 25, 50, 75, 90, 95, 99]\n",
" for p in percentiles:\n",
" stats_dict[f'percentile_{p}'] = np.percentile(data[solar_column].dropna(), p)\n",
"\n",
" # 2. Visualizzazioni\n",
"\n",
" # 2.1 Distribuzione\n",
" plt.subplot(2, 2, 1)\n",
" sns.histplot(data=data, x=solar_column, kde=True)\n",
" plt.title(f'Distribuzione di {name}')\n",
" plt.xlabel(f'{name}')\n",
" plt.ylabel('Frequenza')\n",
"\n",
" # 2.2 Box Plot\n",
" plt.subplot(2, 2, 2)\n",
" sns.boxplot(y=data[solar_column])\n",
" plt.title(f'Box Plot di {name}')\n",
"\n",
" # 2.3 QQ Plot\n",
" plt.subplot(2, 2, 3)\n",
" stats.probplot(data[solar_column].dropna(), dist=\"norm\", plot=plt)\n",
" plt.title(f'Q-Q Plot di {name}')\n",
"\n",
" # 2.4 Distribuzione Log-trasformata\n",
" plt.subplot(2, 2, 4)\n",
" sns.histplot(data=np.log1p(data[solar_column]), kde=True)\n",
" plt.title(f'Distribuzione Log-trasformata di {name}')\n",
" plt.xlabel(f'Log({name} + 1)')\n",
" plt.ylabel('Frequenza')\n",
"\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
" # 3. Analisi temporale se disponibile\n",
" if 'timestamp' in data.columns or 'datetime' in data.columns:\n",
" time_col = 'timestamp' if 'timestamp' in data.columns else 'datetime'\n",
" if isinstance(data[time_col].iloc[0], (int, float)):\n",
" data['temp_datetime'] = pd.to_datetime(data[time_col], unit='s')\n",
" else:\n",
" data['temp_datetime'] = pd.to_datetime(data[time_col])\n",
"\n",
" # Plot temporale\n",
" plt.figure(figsize=(15, 6))\n",
" plt.plot(data['temp_datetime'], data[solar_column])\n",
" plt.title(f'Serie Temporale di {name}')\n",
" plt.xlabel('Data')\n",
" plt.ylabel(f'{name}')\n",
" plt.xticks(rotation=45)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
" # Analisi stagionale\n",
" data['month'] = data['temp_datetime'].dt.month\n",
" seasonal_stats = data.groupby('month')[solar_column].agg(['mean', 'std', 'median'])\n",
"\n",
" plt.figure(figsize=(12, 6))\n",
" seasonal_stats['mean'].plot(kind='bar')\n",
" plt.title(f'Media Mensile di {name}')\n",
" plt.xlabel('Mese')\n",
" plt.ylabel(f'{name} Media')\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
" # 4. Stampa delle statistiche principali\n",
" print(f\"\\nStatistiche principali di {name}:\")\n",
" print(\"-\" * 50)\n",
" for key, value in stats_dict.items():\n",
" print(f\"{key:15}: {value:,.4f}\")\n",
"\n",
" # 5. Suggerimenti per la normalizzazione\n",
" print(\"\\nSuggerimenti per la normalizzazione:\")\n",
" print(\"-\" * 50)\n",
"\n",
" skewness = abs(stats_dict['skewness'])\n",
" if skewness > 1:\n",
" print(\"- La distribuzione è fortemente asimmetrica (skewness > 1)\")\n",
" print(\"- Considerare una trasformazione logaritmica: np.log1p(x)\")\n",
"\n",
" range_ratio = stats_dict['max'] / stats_dict['std']\n",
" if range_ratio > 10:\n",
" print(\"- La variabile ha una scala molto ampia\")\n",
" print(\"- Considerare RobustScaler o StandardScaler per la normalizzazione\")\n",
"\n",
" zero_ratio = stats_dict['zeros'] / stats_dict['count']\n",
" if zero_ratio > 0.1:\n",
" print(f\"- Alta presenza di zeri ({zero_ratio:.2%})\")\n",
" print(\"- Considerare un modello in due parti: classificazione degli zeri + regressione sui valori non-zero\")\n",
"\n",
" return stats_dict"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1b1ee91d1573ec66",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Initializing solar energy model training...\n",
"\n",
"1. Preparing data...\n",
"\n",
"Selected features:\n",
"Number of features: 66\n",
"Features list: ['uvindex', 'cloudcover', 'visibility', 'temp', 'pressure', 'humidity', 'solarradiation', 'solar_elevation', 'solar_angle', 'day_length', 'hour_sin', 'hour_cos', 'day_of_year_sin', 'day_of_year_cos', 'month_sin', 'month_cos', 'solar_noon', 'daylight_correction', 'clear_sky_index', 'atmospheric_attenuation', 'theoretical_radiation', 'expected_radiation', 'cloud_elevation', 'visibility_elevation', 'uv_cloud_interaction', 'temp_radiation_potential', 'air_mass_index', 'atmospheric_stability', 'vapor_pressure_deficit', 'diffusion_index', 'atmospheric_transmittance', 'temp_humidity_interaction', 'clear_sky_factor', 'cloud_rolling_12h', 'temp_rolling_12h', 'uv_rolling_12h', 'cloudcover_rolling_mean_6h', 'temp_rolling_mean_6h', 'energy_rolling_mean_6h', 'uv_rolling_mean_6h', 'energy_volatility', 'uv_volatility', 'temp_1h_lag', 'cloudcover_1h_lag', 'humidity_1h_lag', 'energy_lag_1h', 'uv_lag_1h', 'temp_losses', 'soiling_loss_factor', 'estimated_efficiency', 'production_potential', 'system_performance_ratio', 'conversion_efficiency_ratio', 'clear_sky_duration', 'weather_variability_index', 'temp_stability', 'humidity_stability', 'cloudcover_stability', 'season_Spring', 'season_Summer', 'season_Autumn', 'season_Winter', 'time_period_Morning', 'time_period_Afternoon', 'time_period_Evening', 'time_period_Night']\n",
"Training data shape: (112882, 24, 66)\n",
"Test data shape: (16849, 24, 66)\n",
"Saving scaler X to: 2024-11-27_23-17_scale_X.joblib\n",
"Saving scaler X to: 2024-11-27_23-17_scale_y.joblib\n",
"Saving features to: 2024-11-27_23-17_features.json\n"
]
}
],
"source": [
"df = pd.read_parquet('../../sources/weather_data_solarradiation.parquet')\n",
"\n",
"print(\"Initializing solar energy model training...\")\n",
"\n",
"# Data preparation\n",
"print(\"\\n1. Preparing data...\")\n",
"X_train_seq, X_test_seq, y_train, y_test, scaler_X, scaler_y, features, X_to_predict_seq = prepare_hybrid_data(df)\n",
"\n",
"print(f\"Training data shape: {X_train_seq.shape}\")\n",
"print(f\"Test data shape: {X_test_seq.shape}\")\n",
"\n",
"# Save or load scaler and features\n",
"scaler_X_path = f'{folder_name}_scale_X.joblib'\n",
"scaler_y_path = f'{folder_name}_scale_y.joblib'\n",
"features_path = f'{folder_name}_features.json'\n",
"model_path = f'{folder_name}_best_model.h5'\n",
"history_path = f'{folder_name}_training_history.json'\n",
"\n",
"if os.path.exists(scaler_X_path):\n",
" print(f\"Loading existing scaler X from: {scaler_X_path}\")\n",
" scaler = joblib.load(scaler_X_path)\n",
"else:\n",
" print(f\"Saving scaler X to: {scaler_X_path}\")\n",
" joblib.dump(scaler_X, scaler_X_path)\n",
"\n",
"if os.path.exists(scaler_y_path):\n",
" print(f\"Loading existing scaler X from: {scaler_y_path}\")\n",
" scaler = joblib.load(scaler_y_path)\n",
"else:\n",
" print(f\"Saving scaler X to: {scaler_y_path}\")\n",
" joblib.dump(scaler_y, scaler_y_path)\n",
"\n",
"if os.path.exists(features_path):\n",
" print(f\"Loading existing features from: {features_path}\")\n",
" with open(features_path, 'r') as f:\n",
" features = json.load(f)\n",
"else:\n",
" print(f\"Saving features to: {features_path}\")\n",
" with open(features_path, 'w') as f:\n",
" json.dump(features, f)\n",
"\n",
"# Data quality verification\n",
"if np.isnan(X_train_seq).any() or np.isnan(y_train).any():\n",
" raise ValueError(\"Found NaN values in training data\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "096e79e3-7a3d-4e17-9a30-4d0747ee2d40",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"2. Creating model...\n",
"\\Min dataset solar energy : 0.0 - Scaled Version : 0.0\n",
"\n",
"Max dataset solar energy : 4.0 - Scaled Version : 3.3333333333333335\n",
"Max dataset solar energy increased by 8% : 4.32 - Scaled Version : 3.6000000000000005\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-11-27 23:18:54.766545: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 43404 MB memory: -> device: 0, name: NVIDIA L40, pci bus id: 0000:c1:00.0, compute capability: 8.9\n",
"2024-11-27 23:18:55.999926: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Class distribution in training set:\n",
"Zeros: 56899 (50.41%)\n",
"Non-zeros: 55983 (49.59%)\n",
"\n",
"Class distribution in test set:\n",
"Zeros: 8576 (50.90%)\n",
"Non-zeros: 8273 (49.10%)\n",
"\n",
"Model output names: ['classification_output', 'regression_output', 'final_output']\n",
"\n",
"4. Starting training...\n",
"Epoch 1/150\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-11-27 23:19:24.436497: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8905\n",
"2024-11-27 23:19:24.593649: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n",
"2024-11-27 23:19:26.676664: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x237e6dc0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
"2024-11-27 23:19:26.676699: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): NVIDIA L40, Compute Capability 8.9\n",
"2024-11-27 23:19:26.682750: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
"2024-11-27 23:19:26.852932: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"221/221 [==============================] - ETA: 0s - loss: 10.1498 - classification_output_loss: 0.2192 - regression_output_loss: 0.3883 - final_output_loss: 0.2518\n",
"Epoch 1 Detailed Metrics:\n",
"\n",
"Classification Metrics:\n",
"Accuracy: 95.36%\n",
"AUC-ROC: 0.9917\n",
"\n",
"Confusion Matrix:\n",
"[[8285 291]\n",
" [ 491 7782]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Zero 0.9441 0.9661 0.9549 8576\n",
" Non-Zero 0.9640 0.9407 0.9522 8273\n",
"\n",
" accuracy 0.9536 16849\n",
" macro avg 0.9540 0.9534 0.9535 16849\n",
"weighted avg 0.9538 0.9536 0.9536 16849\n",
"\n",
"\n",
"Regression Metrics (non-zero values):\n",
"Out of range: 246 predictions\n",
"MAPE: 56.03%\n",
"Within ±10%: 4.04%\n",
"MAE: 0.66\n",
"RMSE: 0.87\n",
"\n",
"Final Combined Output Metrics:\n",
"Out of range: 0 predictions\n",
"MAPE: 25.95%\n",
"Within ±2%: 48.48%\n",
"Within ±5%: 49.50%\n",
"Within ±10%: 51.42%\n",
"Within ±20%: 55.81%\n",
"MAE: 0.24\n",
"RMSE: 0.45\n",
"221/221 [==============================] - 66s 124ms/step - loss: 10.1498 - classification_output_loss: 0.2192 - regression_output_loss: 0.3883 - final_output_loss: 0.2518 - val_loss: 7.6804 - val_classification_output_loss: 0.2792 - val_regression_output_loss: 0.4849 - val_final_output_loss: 0.2209\n",
"Epoch 2/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 5.9091 - classification_output_loss: 0.1070 - regression_output_loss: 0.1877 - final_output_loss: 0.1142 - val_loss: 4.7197 - val_classification_output_loss: 0.1352 - val_regression_output_loss: 0.2361 - val_final_output_loss: 0.1195\n",
"Epoch 3/150\n",
"221/221 [==============================] - 14s 64ms/step - loss: 3.9752 - classification_output_loss: 0.0814 - regression_output_loss: 0.1177 - final_output_loss: 0.0640 - val_loss: 3.4943 - val_classification_output_loss: 0.0998 - val_regression_output_loss: 0.1060 - val_final_output_loss: 0.0623\n",
"Epoch 4/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 3.2835 - classification_output_loss: 0.0751 - regression_output_loss: 0.1008 - final_output_loss: 0.0540 - val_loss: 3.1666 - val_classification_output_loss: 0.0896 - val_regression_output_loss: 0.0793 - val_final_output_loss: 0.0562\n",
"Epoch 5/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 2.9948 - classification_output_loss: 0.0926 - regression_output_loss: 0.1700 - final_output_loss: 0.1103 - val_loss: 2.3640 - val_classification_output_loss: 0.1197 - val_regression_output_loss: 0.1617 - val_final_output_loss: 0.1375\n",
"Epoch 6/150\n",
"221/221 [==============================] - 14s 61ms/step - loss: 1.7550 - classification_output_loss: 0.0797 - regression_output_loss: 0.1151 - final_output_loss: 0.0827 - val_loss: 1.2843 - val_classification_output_loss: 0.0880 - val_regression_output_loss: 0.0697 - val_final_output_loss: 0.0442\n",
"Epoch 7/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 1.0277 - classification_output_loss: 0.0647 - regression_output_loss: 0.0847 - final_output_loss: 0.0549 - val_loss: 0.8079 - val_classification_output_loss: 0.0836 - val_regression_output_loss: 0.0610 - val_final_output_loss: 0.0438\n",
"Epoch 8/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.6795 - classification_output_loss: 0.0600 - regression_output_loss: 0.0716 - final_output_loss: 0.0498 - val_loss: 0.5649 - val_classification_output_loss: 0.0770 - val_regression_output_loss: 0.0542 - val_final_output_loss: 0.0392\n",
"Epoch 9/150\n",
"221/221 [==============================] - 15s 67ms/step - loss: 0.4970 - classification_output_loss: 0.0545 - regression_output_loss: 0.0634 - final_output_loss: 0.0434 - val_loss: 0.4335 - val_classification_output_loss: 0.0751 - val_regression_output_loss: 0.0452 - val_final_output_loss: 0.0354\n",
"Epoch 10/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.3957 - classification_output_loss: 0.0517 - regression_output_loss: 0.0524 - final_output_loss: 0.0386 - val_loss: 0.3625 - val_classification_output_loss: 0.0749 - val_regression_output_loss: 0.0416 - val_final_output_loss: 0.0325\n",
"Epoch 11/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.3395 - classification_output_loss: 0.0503 - regression_output_loss: 0.0451 - final_output_loss: 0.0335 - val_loss: 0.3256 - val_classification_output_loss: 0.0750 - val_regression_output_loss: 0.0407 - val_final_output_loss: 0.0317\n",
"Epoch 12/150\n",
"221/221 [==============================] - 15s 66ms/step - loss: 0.3114 - classification_output_loss: 0.0509 - regression_output_loss: 0.0411 - final_output_loss: 0.0309 - val_loss: 0.3090 - val_classification_output_loss: 0.0738 - val_regression_output_loss: 0.0406 - val_final_output_loss: 0.0322\n",
"Epoch 13/150\n",
"221/221 [==============================] - 14s 61ms/step - loss: 0.3011 - classification_output_loss: 0.0523 - regression_output_loss: 0.0406 - final_output_loss: 0.0305 - val_loss: 0.2999 - val_classification_output_loss: 0.0677 - val_regression_output_loss: 0.0358 - val_final_output_loss: 0.0293\n",
"Epoch 14/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.3141 - classification_output_loss: 0.0616 - regression_output_loss: 0.0705 - final_output_loss: 0.0576 - val_loss: 0.3864 - val_classification_output_loss: 0.0790 - val_regression_output_loss: 0.2013 - val_final_output_loss: 0.1696\n",
"Epoch 15/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.2690 - classification_output_loss: 0.0643 - regression_output_loss: 0.1000 - final_output_loss: 0.0724 - val_loss: 0.2078 - val_classification_output_loss: 0.0773 - val_regression_output_loss: 0.0603 - val_final_output_loss: 0.0349\n",
"Epoch 16/150\n",
"221/221 [==============================] - 12s 56ms/step - loss: 0.1958 - classification_output_loss: 0.0566 - regression_output_loss: 0.0729 - final_output_loss: 0.0548 - val_loss: 0.1644 - val_classification_output_loss: 0.0686 - val_regression_output_loss: 0.0517 - val_final_output_loss: 0.0378\n",
"Epoch 17/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.1549 - classification_output_loss: 0.0523 - regression_output_loss: 0.0585 - final_output_loss: 0.0489 - val_loss: 0.1353 - val_classification_output_loss: 0.0668 - val_regression_output_loss: 0.0478 - val_final_output_loss: 0.0354\n",
"Epoch 18/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.1323 - classification_output_loss: 0.0503 - regression_output_loss: 0.0551 - final_output_loss: 0.0493 - val_loss: 0.1225 - val_classification_output_loss: 0.0707 - val_regression_output_loss: 0.0496 - val_final_output_loss: 0.0421\n",
"Epoch 19/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.1139 - classification_output_loss: 0.0501 - regression_output_loss: 0.0497 - final_output_loss: 0.0457 - val_loss: 0.1095 - val_classification_output_loss: 0.0744 - val_regression_output_loss: 0.0481 - val_final_output_loss: 0.0386\n",
"Epoch 20/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0980 - classification_output_loss: 0.0462 - regression_output_loss: 0.0436 - final_output_loss: 0.0403 - val_loss: 0.0943 - val_classification_output_loss: 0.0679 - val_regression_output_loss: 0.0407 - val_final_output_loss: 0.0344\n",
"Epoch 21/150\n",
"221/221 [==============================] - ETA: 0s - loss: 0.0874 - classification_output_loss: 0.0439 - regression_output_loss: 0.0402 - final_output_loss: 0.0375\n",
"Epoch 21 Detailed Metrics:\n",
"\n",
"Classification Metrics:\n",
"Accuracy: 97.16%\n",
"AUC-ROC: 0.9962\n",
"\n",
"Confusion Matrix:\n",
"[[8389 187]\n",
" [ 291 7982]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Zero 0.9665 0.9782 0.9723 8576\n",
" Non-Zero 0.9771 0.9648 0.9709 8273\n",
"\n",
" accuracy 0.9716 16849\n",
" macro avg 0.9718 0.9715 0.9716 16849\n",
"weighted avg 0.9717 0.9716 0.9716 16849\n",
"\n",
"\n",
"Regression Metrics (non-zero values):\n",
"Out of range: 26 predictions\n",
"MAPE: 19.29%\n",
"Within ±10%: 44.86%\n",
"MAE: 0.11\n",
"RMSE: 0.14\n",
"\n",
"Final Combined Output Metrics:\n",
"Out of range: 0 predictions\n",
"MAPE: 13.12%\n",
"Within ±2%: 55.12%\n",
"Within ±5%: 62.25%\n",
"Within ±10%: 74.22%\n",
"Within ±20%: 84.48%\n",
"MAE: 0.06\n",
"RMSE: 0.10\n",
"221/221 [==============================] - 20s 91ms/step - loss: 0.0874 - classification_output_loss: 0.0439 - regression_output_loss: 0.0402 - final_output_loss: 0.0375 - val_loss: 0.0881 - val_classification_output_loss: 0.0742 - val_regression_output_loss: 0.0395 - val_final_output_loss: 0.0330\n",
"Epoch 22/150\n",
"221/221 [==============================] - 14s 65ms/step - loss: 0.0800 - classification_output_loss: 0.0425 - regression_output_loss: 0.0390 - final_output_loss: 0.0352 - val_loss: 0.0900 - val_classification_output_loss: 0.0677 - val_regression_output_loss: 0.0532 - val_final_output_loss: 0.0388\n",
"Epoch 23/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0748 - classification_output_loss: 0.0402 - regression_output_loss: 0.0385 - final_output_loss: 0.0340 - val_loss: 0.0783 - val_classification_output_loss: 0.0639 - val_regression_output_loss: 0.0371 - val_final_output_loss: 0.0365\n",
"Epoch 24/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0670 - classification_output_loss: 0.0385 - regression_output_loss: 0.0327 - final_output_loss: 0.0290 - val_loss: 0.0738 - val_classification_output_loss: 0.0631 - val_regression_output_loss: 0.0350 - val_final_output_loss: 0.0350\n",
"Epoch 25/150\n",
"221/221 [==============================] - 12s 56ms/step - loss: 0.0620 - classification_output_loss: 0.0378 - regression_output_loss: 0.0294 - final_output_loss: 0.0260 - val_loss: 0.0657 - val_classification_output_loss: 0.0624 - val_regression_output_loss: 0.0286 - val_final_output_loss: 0.0271\n",
"Epoch 26/150\n",
"221/221 [==============================] - 13s 57ms/step - loss: 0.0591 - classification_output_loss: 0.0374 - regression_output_loss: 0.0284 - final_output_loss: 0.0248 - val_loss: 0.0618 - val_classification_output_loss: 0.0628 - val_regression_output_loss: 0.0258 - val_final_output_loss: 0.0240\n",
"Epoch 27/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0570 - classification_output_loss: 0.0361 - regression_output_loss: 0.0277 - final_output_loss: 0.0243 - val_loss: 0.0591 - val_classification_output_loss: 0.0622 - val_regression_output_loss: 0.0257 - val_final_output_loss: 0.0203\n",
"Epoch 28/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0555 - classification_output_loss: 0.0362 - regression_output_loss: 0.0272 - final_output_loss: 0.0233 - val_loss: 0.0584 - val_classification_output_loss: 0.0615 - val_regression_output_loss: 0.0266 - val_final_output_loss: 0.0198\n",
"Epoch 29/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.0550 - classification_output_loss: 0.0364 - regression_output_loss: 0.0273 - final_output_loss: 0.0231 - val_loss: 0.0588 - val_classification_output_loss: 0.0611 - val_regression_output_loss: 0.0273 - val_final_output_loss: 0.0214\n",
"Epoch 30/150\n",
"221/221 [==============================] - 14s 64ms/step - loss: 0.0548 - classification_output_loss: 0.0375 - regression_output_loss: 0.0272 - final_output_loss: 0.0231 - val_loss: 0.0565 - val_classification_output_loss: 0.0579 - val_regression_output_loss: 0.0247 - val_final_output_loss: 0.0201\n",
"Epoch 31/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0553 - classification_output_loss: 0.0371 - regression_output_loss: 0.0285 - final_output_loss: 0.0236 - val_loss: 0.0548 - val_classification_output_loss: 0.0564 - val_regression_output_loss: 0.0222 - val_final_output_loss: 0.0191\n",
"Epoch 32/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0793 - classification_output_loss: 0.0410 - regression_output_loss: 0.0607 - final_output_loss: 0.0465 - val_loss: 0.2093 - val_classification_output_loss: 0.1111 - val_regression_output_loss: 0.1922 - val_final_output_loss: 0.1775\n",
"Epoch 33/150\n",
"221/221 [==============================] - 14s 65ms/step - loss: 0.1067 - classification_output_loss: 0.0635 - regression_output_loss: 0.0839 - final_output_loss: 0.0643 - val_loss: 0.0728 - val_classification_output_loss: 0.0623 - val_regression_output_loss: 0.0473 - val_final_output_loss: 0.0327\n",
"Epoch 34/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0784 - classification_output_loss: 0.0467 - regression_output_loss: 0.0531 - final_output_loss: 0.0493 - val_loss: 0.0785 - val_classification_output_loss: 0.0949 - val_regression_output_loss: 0.0493 - val_final_output_loss: 0.0359\n",
"Epoch 35/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0675 - classification_output_loss: 0.0457 - regression_output_loss: 0.0424 - final_output_loss: 0.0420 - val_loss: 0.0692 - val_classification_output_loss: 0.0691 - val_regression_output_loss: 0.0519 - val_final_output_loss: 0.0288\n",
"Epoch 36/150\n",
"221/221 [==============================] - 15s 66ms/step - loss: 0.0676 - classification_output_loss: 0.0418 - regression_output_loss: 0.0452 - final_output_loss: 0.0455 - val_loss: 0.0689 - val_classification_output_loss: 0.0829 - val_regression_output_loss: 0.0430 - val_final_output_loss: 0.0324\n",
"Epoch 37/150\n",
"221/221 [==============================] - 12s 56ms/step - loss: 0.0595 - classification_output_loss: 0.0396 - regression_output_loss: 0.0376 - final_output_loss: 0.0386 - val_loss: 0.0798 - val_classification_output_loss: 0.0626 - val_regression_output_loss: 0.0699 - val_final_output_loss: 0.0473\n",
"Epoch 38/150\n",
"221/221 [==============================] - 13s 57ms/step - loss: 0.0606 - classification_output_loss: 0.0404 - regression_output_loss: 0.0414 - final_output_loss: 0.0402 - val_loss: 0.0661 - val_classification_output_loss: 0.0571 - val_regression_output_loss: 0.0558 - val_final_output_loss: 0.0315\n",
"Epoch 39/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0570 - classification_output_loss: 0.0375 - regression_output_loss: 0.0370 - final_output_loss: 0.0393 - val_loss: 0.0550 - val_classification_output_loss: 0.0546 - val_regression_output_loss: 0.0365 - val_final_output_loss: 0.0288\n",
"Epoch 40/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.0544 - classification_output_loss: 0.0390 - regression_output_loss: 0.0361 - final_output_loss: 0.0359 - val_loss: 0.0600 - val_classification_output_loss: 0.0527 - val_regression_output_loss: 0.0424 - val_final_output_loss: 0.0381\n",
"Epoch 41/150\n",
"221/221 [==============================] - ETA: 0s - loss: 0.0505 - classification_output_loss: 0.0366 - regression_output_loss: 0.0326 - final_output_loss: 0.0335\n",
"Epoch 41 Detailed Metrics:\n",
"\n",
"Classification Metrics:\n",
"Accuracy: 97.79%\n",
"AUC-ROC: 0.9980\n",
"\n",
"Confusion Matrix:\n",
"[[8337 239]\n",
" [ 133 8140]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Zero 0.9843 0.9721 0.9782 8576\n",
" Non-Zero 0.9715 0.9839 0.9777 8273\n",
"\n",
" accuracy 0.9779 16849\n",
" macro avg 0.9779 0.9780 0.9779 16849\n",
"weighted avg 0.9780 0.9779 0.9779 16849\n",
"\n",
"\n",
"Regression Metrics (non-zero values):\n",
"Out of range: 66 predictions\n",
"MAPE: 16.65%\n",
"Within ±10%: 48.35%\n",
"MAE: 0.13\n",
"RMSE: 0.19\n",
"\n",
"Final Combined Output Metrics:\n",
"Out of range: 0 predictions\n",
"MAPE: 10.82%\n",
"Within ±2%: 56.88%\n",
"Within ±5%: 64.73%\n",
"Within ±10%: 74.46%\n",
"Within ±20%: 86.63%\n",
"MAE: 0.06\n",
"RMSE: 0.11\n",
"221/221 [==============================] - 20s 89ms/step - loss: 0.0505 - classification_output_loss: 0.0366 - regression_output_loss: 0.0326 - final_output_loss: 0.0335 - val_loss: 0.0626 - val_classification_output_loss: 0.0581 - val_regression_output_loss: 0.0524 - val_final_output_loss: 0.0347\n",
"Epoch 42/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0519 - classification_output_loss: 0.0342 - regression_output_loss: 0.0354 - final_output_loss: 0.0366 - val_loss: 0.0468 - val_classification_output_loss: 0.0514 - val_regression_output_loss: 0.0282 - val_final_output_loss: 0.0241\n",
"Epoch 43/150\n",
"221/221 [==============================] - 12s 56ms/step - loss: 0.0489 - classification_output_loss: 0.0327 - regression_output_loss: 0.0326 - final_output_loss: 0.0343 - val_loss: 0.0487 - val_classification_output_loss: 0.0563 - val_regression_output_loss: 0.0302 - val_final_output_loss: 0.0271\n",
"Epoch 44/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0477 - classification_output_loss: 0.0337 - regression_output_loss: 0.0313 - final_output_loss: 0.0340 - val_loss: 0.0483 - val_classification_output_loss: 0.0535 - val_regression_output_loss: 0.0292 - val_final_output_loss: 0.0297\n",
"Epoch 45/150\n",
"221/221 [==============================] - 14s 65ms/step - loss: 0.0455 - classification_output_loss: 0.0308 - regression_output_loss: 0.0296 - final_output_loss: 0.0330 - val_loss: 0.0433 - val_classification_output_loss: 0.0494 - val_regression_output_loss: 0.0274 - val_final_output_loss: 0.0220\n",
"Epoch 46/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.0433 - classification_output_loss: 0.0298 - regression_output_loss: 0.0286 - final_output_loss: 0.0304 - val_loss: 0.0455 - val_classification_output_loss: 0.0634 - val_regression_output_loss: 0.0265 - val_final_output_loss: 0.0224\n",
"Epoch 47/150\n",
"221/221 [==============================] - 13s 57ms/step - loss: 0.0413 - classification_output_loss: 0.0300 - regression_output_loss: 0.0274 - final_output_loss: 0.0281 - val_loss: 0.0418 - val_classification_output_loss: 0.0464 - val_regression_output_loss: 0.0273 - val_final_output_loss: 0.0227\n",
"Epoch 48/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0418 - classification_output_loss: 0.0295 - regression_output_loss: 0.0282 - final_output_loss: 0.0301 - val_loss: 0.0518 - val_classification_output_loss: 0.0546 - val_regression_output_loss: 0.0372 - val_final_output_loss: 0.0337\n",
"Epoch 49/150\n",
"221/221 [==============================] - 12s 56ms/step - loss: 0.0404 - classification_output_loss: 0.0272 - regression_output_loss: 0.0272 - final_output_loss: 0.0293 - val_loss: 0.0580 - val_classification_output_loss: 0.0484 - val_regression_output_loss: 0.0416 - val_final_output_loss: 0.0473\n",
"Epoch 50/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0399 - classification_output_loss: 0.0275 - regression_output_loss: 0.0270 - final_output_loss: 0.0284 - val_loss: 0.0492 - val_classification_output_loss: 0.0514 - val_regression_output_loss: 0.0317 - val_final_output_loss: 0.0357\n",
"Epoch 51/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.0362 - classification_output_loss: 0.0262 - regression_output_loss: 0.0236 - final_output_loss: 0.0246 - val_loss: 0.0476 - val_classification_output_loss: 0.0431 - val_regression_output_loss: 0.0343 - val_final_output_loss: 0.0346\n",
"Epoch 52/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0351 - classification_output_loss: 0.0258 - regression_output_loss: 0.0231 - final_output_loss: 0.0238 - val_loss: 0.0457 - val_classification_output_loss: 0.0419 - val_regression_output_loss: 0.0328 - val_final_output_loss: 0.0331\n",
"Epoch 53/150\n",
"221/221 [==============================] - 14s 61ms/step - loss: 0.0329 - classification_output_loss: 0.0245 - regression_output_loss: 0.0213 - final_output_loss: 0.0216 - val_loss: 0.0407 - val_classification_output_loss: 0.0418 - val_regression_output_loss: 0.0274 - val_final_output_loss: 0.0273\n",
"Epoch 54/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0315 - classification_output_loss: 0.0237 - regression_output_loss: 0.0206 - final_output_loss: 0.0203 - val_loss: 0.0371 - val_classification_output_loss: 0.0387 - val_regression_output_loss: 0.0254 - val_final_output_loss: 0.0229\n",
"Epoch 55/150\n",
"221/221 [==============================] - 14s 61ms/step - loss: 0.0311 - classification_output_loss: 0.0225 - regression_output_loss: 0.0206 - final_output_loss: 0.0206 - val_loss: 0.0356 - val_classification_output_loss: 0.0381 - val_regression_output_loss: 0.0235 - val_final_output_loss: 0.0219\n",
"Epoch 56/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0302 - classification_output_loss: 0.0223 - regression_output_loss: 0.0201 - final_output_loss: 0.0198 - val_loss: 0.0351 - val_classification_output_loss: 0.0411 - val_regression_output_loss: 0.0224 - val_final_output_loss: 0.0207\n",
"Epoch 57/150\n",
"221/221 [==============================] - 13s 57ms/step - loss: 0.0301 - classification_output_loss: 0.0221 - regression_output_loss: 0.0199 - final_output_loss: 0.0201 - val_loss: 0.0340 - val_classification_output_loss: 0.0393 - val_regression_output_loss: 0.0215 - val_final_output_loss: 0.0205\n",
"Epoch 58/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0296 - classification_output_loss: 0.0213 - regression_output_loss: 0.0199 - final_output_loss: 0.0197 - val_loss: 0.0326 - val_classification_output_loss: 0.0389 - val_regression_output_loss: 0.0204 - val_final_output_loss: 0.0186\n",
"Epoch 59/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0296 - classification_output_loss: 0.0210 - regression_output_loss: 0.0200 - final_output_loss: 0.0200 - val_loss: 0.0311 - val_classification_output_loss: 0.0367 - val_regression_output_loss: 0.0206 - val_final_output_loss: 0.0161\n",
"Epoch 60/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0295 - classification_output_loss: 0.0211 - regression_output_loss: 0.0202 - final_output_loss: 0.0198 - val_loss: 0.0315 - val_classification_output_loss: 0.0365 - val_regression_output_loss: 0.0215 - val_final_output_loss: 0.0165\n",
"Epoch 61/150\n",
"221/221 [==============================] - ETA: 0s - loss: 0.0290 - classification_output_loss: 0.0201 - regression_output_loss: 0.0199 - final_output_loss: 0.0195\n",
"Epoch 61 Detailed Metrics:\n",
"\n",
"Classification Metrics:\n",
"Accuracy: 98.60%\n",
"AUC-ROC: 0.9993\n",
"\n",
"Confusion Matrix:\n",
"[[8473 103]\n",
" [ 133 8140]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Zero 0.9845 0.9880 0.9863 8576\n",
" Non-Zero 0.9875 0.9839 0.9857 8273\n",
"\n",
" accuracy 0.9860 16849\n",
" macro avg 0.9860 0.9860 0.9860 16849\n",
"weighted avg 0.9860 0.9860 0.9860 16849\n",
"\n",
"\n",
"Regression Metrics (non-zero values):\n",
"Out of range: 0 predictions\n",
"MAPE: 11.30%\n",
"Within ±10%: 73.14%\n",
"MAE: 0.06\n",
"RMSE: 0.09\n",
"\n",
"Final Combined Output Metrics:\n",
"Out of range: 0 predictions\n",
"MAPE: 7.72%\n",
"Within ±2%: 60.84%\n",
"Within ±5%: 74.53%\n",
"Within ±10%: 86.72%\n",
"Within ±20%: 91.58%\n",
"MAE: 0.03\n",
"RMSE: 0.06\n",
"221/221 [==============================] - 20s 90ms/step - loss: 0.0290 - classification_output_loss: 0.0201 - regression_output_loss: 0.0199 - final_output_loss: 0.0195 - val_loss: 0.0315 - val_classification_output_loss: 0.0356 - val_regression_output_loss: 0.0215 - val_final_output_loss: 0.0171\n",
"Epoch 62/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0290 - classification_output_loss: 0.0207 - regression_output_loss: 0.0199 - final_output_loss: 0.0194 - val_loss: 0.0311 - val_classification_output_loss: 0.0355 - val_regression_output_loss: 0.0206 - val_final_output_loss: 0.0172\n",
"Epoch 63/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0288 - classification_output_loss: 0.0205 - regression_output_loss: 0.0199 - final_output_loss: 0.0192 - val_loss: 0.0308 - val_classification_output_loss: 0.0349 - val_regression_output_loss: 0.0199 - val_final_output_loss: 0.0175\n",
"Epoch 64/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0289 - classification_output_loss: 0.0207 - regression_output_loss: 0.0200 - final_output_loss: 0.0194 - val_loss: 0.0302 - val_classification_output_loss: 0.0348 - val_regression_output_loss: 0.0191 - val_final_output_loss: 0.0168\n",
"Epoch 65/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0289 - classification_output_loss: 0.0204 - regression_output_loss: 0.0202 - final_output_loss: 0.0194 - val_loss: 0.0297 - val_classification_output_loss: 0.0349 - val_regression_output_loss: 0.0185 - val_final_output_loss: 0.0160\n",
"Epoch 66/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.0295 - classification_output_loss: 0.0209 - regression_output_loss: 0.0211 - final_output_loss: 0.0198 - val_loss: 0.0294 - val_classification_output_loss: 0.0350 - val_regression_output_loss: 0.0180 - val_final_output_loss: 0.0157\n",
"Epoch 67/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0302 - classification_output_loss: 0.0208 - regression_output_loss: 0.0215 - final_output_loss: 0.0210 - val_loss: 0.0303 - val_classification_output_loss: 0.0348 - val_regression_output_loss: 0.0191 - val_final_output_loss: 0.0170\n",
"Epoch 68/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0304 - classification_output_loss: 0.0223 - regression_output_loss: 0.0210 - final_output_loss: 0.0212 - val_loss: 0.0636 - val_classification_output_loss: 0.0548 - val_regression_output_loss: 0.0283 - val_final_output_loss: 0.0759\n",
"Epoch 69/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.0798 - classification_output_loss: 0.0495 - regression_output_loss: 0.0662 - final_output_loss: 0.0655 - val_loss: 0.0591 - val_classification_output_loss: 0.0509 - val_regression_output_loss: 0.0539 - val_final_output_loss: 0.0388\n",
"Epoch 70/150\n",
"221/221 [==============================] - 12s 56ms/step - loss: 0.0506 - classification_output_loss: 0.0340 - regression_output_loss: 0.0369 - final_output_loss: 0.0415 - val_loss: 0.0465 - val_classification_output_loss: 0.0452 - val_regression_output_loss: 0.0398 - val_final_output_loss: 0.0249\n",
"Epoch 71/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0450 - classification_output_loss: 0.0282 - regression_output_loss: 0.0332 - final_output_loss: 0.0362 - val_loss: 0.0431 - val_classification_output_loss: 0.0442 - val_regression_output_loss: 0.0316 - val_final_output_loss: 0.0284\n",
"Epoch 72/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0425 - classification_output_loss: 0.0302 - regression_output_loss: 0.0303 - final_output_loss: 0.0330 - val_loss: 0.0478 - val_classification_output_loss: 0.0484 - val_regression_output_loss: 0.0391 - val_final_output_loss: 0.0306\n",
"Epoch 73/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0413 - classification_output_loss: 0.0268 - regression_output_loss: 0.0300 - final_output_loss: 0.0335 - val_loss: 0.0437 - val_classification_output_loss: 0.0455 - val_regression_output_loss: 0.0275 - val_final_output_loss: 0.0344\n",
"Epoch 74/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.0429 - classification_output_loss: 0.0309 - regression_output_loss: 0.0297 - final_output_loss: 0.0353 - val_loss: 0.0438 - val_classification_output_loss: 0.0651 - val_regression_output_loss: 0.0286 - val_final_output_loss: 0.0228\n",
"Epoch 75/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0391 - classification_output_loss: 0.0249 - regression_output_loss: 0.0278 - final_output_loss: 0.0318 - val_loss: 0.0420 - val_classification_output_loss: 0.0521 - val_regression_output_loss: 0.0279 - val_final_output_loss: 0.0266\n",
"Epoch 76/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0378 - classification_output_loss: 0.0254 - regression_output_loss: 0.0252 - final_output_loss: 0.0311 - val_loss: 0.0443 - val_classification_output_loss: 0.0531 - val_regression_output_loss: 0.0255 - val_final_output_loss: 0.0357\n",
"Epoch 77/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.0387 - classification_output_loss: 0.0283 - regression_output_loss: 0.0267 - final_output_loss: 0.0322 - val_loss: 0.0744 - val_classification_output_loss: 0.0440 - val_regression_output_loss: 0.0526 - val_final_output_loss: 0.0837\n",
"Epoch 78/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0428 - classification_output_loss: 0.0288 - regression_output_loss: 0.0317 - final_output_loss: 0.0347 - val_loss: 0.0552 - val_classification_output_loss: 0.0460 - val_regression_output_loss: 0.0467 - val_final_output_loss: 0.0405\n",
"Epoch 79/150\n",
"221/221 [==============================] - 14s 65ms/step - loss: 0.0370 - classification_output_loss: 0.0250 - regression_output_loss: 0.0260 - final_output_loss: 0.0290 - val_loss: 0.0362 - val_classification_output_loss: 0.0526 - val_regression_output_loss: 0.0227 - val_final_output_loss: 0.0187\n",
"Epoch 80/150\n",
"221/221 [==============================] - 15s 66ms/step - loss: 0.0367 - classification_output_loss: 0.0248 - regression_output_loss: 0.0252 - final_output_loss: 0.0299 - val_loss: 0.0427 - val_classification_output_loss: 0.0726 - val_regression_output_loss: 0.0270 - val_final_output_loss: 0.0209\n",
"Epoch 81/150\n",
"221/221 [==============================] - ETA: 0s - loss: 0.0363 - classification_output_loss: 0.0254 - regression_output_loss: 0.0261 - final_output_loss: 0.0294\n",
"Epoch 81 Detailed Metrics:\n",
"\n",
"Classification Metrics:\n",
"Accuracy: 98.52%\n",
"AUC-ROC: 0.9992\n",
"\n",
"Confusion Matrix:\n",
"[[8431 145]\n",
" [ 104 8169]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Zero 0.9878 0.9831 0.9854 8576\n",
" Non-Zero 0.9826 0.9874 0.9850 8273\n",
"\n",
" accuracy 0.9852 16849\n",
" macro avg 0.9852 0.9853 0.9852 16849\n",
"weighted avg 0.9852 0.9852 0.9852 16849\n",
"\n",
"\n",
"Regression Metrics (non-zero values):\n",
"Out of range: 18 predictions\n",
"MAPE: 17.42%\n",
"Within ±10%: 42.09%\n",
"MAE: 0.15\n",
"RMSE: 0.21\n",
"\n",
"Final Combined Output Metrics:\n",
"Out of range: 0 predictions\n",
"MAPE: 13.33%\n",
"Within ±2%: 53.80%\n",
"Within ±5%: 59.62%\n",
"Within ±10%: 68.52%\n",
"Within ±20%: 80.93%\n",
"MAE: 0.08\n",
"RMSE: 0.14\n",
"221/221 [==============================] - 20s 90ms/step - loss: 0.0363 - classification_output_loss: 0.0254 - regression_output_loss: 0.0261 - final_output_loss: 0.0294 - val_loss: 0.0601 - val_classification_output_loss: 0.0380 - val_regression_output_loss: 0.0604 - val_final_output_loss: 0.0479\n",
"Epoch 82/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0396 - classification_output_loss: 0.0282 - regression_output_loss: 0.0283 - final_output_loss: 0.0328 - val_loss: 0.0370 - val_classification_output_loss: 0.0409 - val_regression_output_loss: 0.0238 - val_final_output_loss: 0.0237\n",
"Epoch 83/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0357 - classification_output_loss: 0.0229 - regression_output_loss: 0.0256 - final_output_loss: 0.0287 - val_loss: 0.0380 - val_classification_output_loss: 0.0534 - val_regression_output_loss: 0.0252 - val_final_output_loss: 0.0216\n",
"Epoch 84/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0337 - classification_output_loss: 0.0232 - regression_output_loss: 0.0235 - final_output_loss: 0.0272 - val_loss: 0.0497 - val_classification_output_loss: 0.0303 - val_regression_output_loss: 0.0465 - val_final_output_loss: 0.0407\n",
"Epoch 85/150\n",
"221/221 [==============================] - 15s 66ms/step - loss: 0.0380 - classification_output_loss: 0.0252 - regression_output_loss: 0.0267 - final_output_loss: 0.0329 - val_loss: 0.0559 - val_classification_output_loss: 0.0405 - val_regression_output_loss: 0.0447 - val_final_output_loss: 0.0485\n",
"Epoch 86/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.0339 - classification_output_loss: 0.0219 - regression_output_loss: 0.0249 - final_output_loss: 0.0265 - val_loss: 0.0419 - val_classification_output_loss: 0.0481 - val_regression_output_loss: 0.0285 - val_final_output_loss: 0.0306\n",
"Epoch 87/150\n",
"221/221 [==============================] - 14s 65ms/step - loss: 0.0327 - classification_output_loss: 0.0218 - regression_output_loss: 0.0230 - final_output_loss: 0.0265 - val_loss: 0.0339 - val_classification_output_loss: 0.0380 - val_regression_output_loss: 0.0253 - val_final_output_loss: 0.0204\n",
"Epoch 88/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0328 - classification_output_loss: 0.0223 - regression_output_loss: 0.0236 - final_output_loss: 0.0267 - val_loss: 0.0476 - val_classification_output_loss: 0.0404 - val_regression_output_loss: 0.0346 - val_final_output_loss: 0.0431\n",
"Epoch 89/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.0349 - classification_output_loss: 0.0226 - regression_output_loss: 0.0249 - final_output_loss: 0.0295 - val_loss: 0.0416 - val_classification_output_loss: 0.0428 - val_regression_output_loss: 0.0297 - val_final_output_loss: 0.0298\n",
"Epoch 90/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.0321 - classification_output_loss: 0.0202 - regression_output_loss: 0.0225 - final_output_loss: 0.0262 - val_loss: 0.0324 - val_classification_output_loss: 0.0381 - val_regression_output_loss: 0.0226 - val_final_output_loss: 0.0197\n",
"Epoch 91/150\n",
"221/221 [==============================] - 13s 60ms/step - loss: 0.0307 - classification_output_loss: 0.0208 - regression_output_loss: 0.0223 - final_output_loss: 0.0245 - val_loss: 0.0384 - val_classification_output_loss: 0.0717 - val_regression_output_loss: 0.0236 - val_final_output_loss: 0.0179\n",
"Epoch 92/150\n",
"221/221 [==============================] - 12s 56ms/step - loss: 0.0302 - classification_output_loss: 0.0204 - regression_output_loss: 0.0212 - final_output_loss: 0.0250 - val_loss: 0.0435 - val_classification_output_loss: 0.0330 - val_regression_output_loss: 0.0379 - val_final_output_loss: 0.0356\n",
"Epoch 93/150\n",
"221/221 [==============================] - 13s 59ms/step - loss: 0.0327 - classification_output_loss: 0.0197 - regression_output_loss: 0.0238 - final_output_loss: 0.0283 - val_loss: 0.0357 - val_classification_output_loss: 0.0459 - val_regression_output_loss: 0.0234 - val_final_output_loss: 0.0223\n",
"Epoch 94/150\n",
"221/221 [==============================] - 14s 64ms/step - loss: 0.0300 - classification_output_loss: 0.0179 - regression_output_loss: 0.0221 - final_output_loss: 0.0241 - val_loss: 0.0309 - val_classification_output_loss: 0.0322 - val_regression_output_loss: 0.0219 - val_final_output_loss: 0.0210\n",
"Epoch 95/150\n",
"221/221 [==============================] - 14s 63ms/step - loss: 0.0293 - classification_output_loss: 0.0181 - regression_output_loss: 0.0207 - final_output_loss: 0.0246 - val_loss: 0.0310 - val_classification_output_loss: 0.0385 - val_regression_output_loss: 0.0222 - val_final_output_loss: 0.0183\n",
"Epoch 96/150\n",
"221/221 [==============================] - 13s 58ms/step - loss: 0.0278 - classification_output_loss: 0.0172 - regression_output_loss: 0.0199 - final_output_loss: 0.0227 - val_loss: 0.0361 - val_classification_output_loss: 0.0571 - val_regression_output_loss: 0.0237 - val_final_output_loss: 0.0203\n",
"Epoch 97/150\n",
"221/221 [==============================] - 15s 66ms/step - loss: 0.0295 - classification_output_loss: 0.0197 - regression_output_loss: 0.0209 - final_output_loss: 0.0247 - val_loss: 0.0316 - val_classification_output_loss: 0.0417 - val_regression_output_loss: 0.0214 - val_final_output_loss: 0.0181\n",
"Epoch 98/150\n",
"221/221 [==============================] - 14s 62ms/step - loss: 0.0289 - classification_output_loss: 0.0174 - regression_output_loss: 0.0211 - final_output_loss: 0.0240 - val_loss: 0.0450 - val_classification_output_loss: 0.0319 - val_regression_output_loss: 0.0309 - val_final_output_loss: 0.0451\n",
"Epoch 99/150\n",
"221/221 [==============================] - 13s 61ms/step - loss: 0.0302 - classification_output_loss: 0.0194 - regression_output_loss: 0.0216 - final_output_loss: 0.0255 - val_loss: 0.0351 - val_classification_output_loss: 0.0486 - val_regression_output_loss: 0.0228 - val_final_output_loss: 0.0221\n",
"Epoch 100/150\n",
"221/221 [==============================] - 15s 68ms/step - loss: 0.0268 - classification_output_loss: 0.0169 - regression_output_loss: 0.0194 - final_output_loss: 0.0214 - val_loss: 0.0330 - val_classification_output_loss: 0.0376 - val_regression_output_loss: 0.0208 - val_final_output_loss: 0.0257\n",
"Epoch 101/150\n",
"221/221 [==============================] - ETA: 0s - loss: 0.0261 - classification_output_loss: 0.0137 - regression_output_loss: 0.0188 - final_output_loss: 0.0227Restoring model weights from the end of the best epoch: 66.\n",
"\n",
"Epoch 101 Detailed Metrics:\n",
"\n",
"Classification Metrics:\n",
"Accuracy: 98.65%\n",
"AUC-ROC: 0.9994\n",
"\n",
"Confusion Matrix:\n",
"[[8497 79]\n",
" [ 148 8125]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Zero 0.9829 0.9908 0.9868 8576\n",
" Non-Zero 0.9904 0.9821 0.9862 8273\n",
"\n",
" accuracy 0.9865 16849\n",
" macro avg 0.9866 0.9864 0.9865 16849\n",
"weighted avg 0.9866 0.9865 0.9865 16849\n",
"\n",
"\n",
"Regression Metrics (non-zero values):\n",
"Out of range: 0 predictions\n",
"MAPE: 10.76%\n",
"Within ±10%: 75.03%\n",
"MAE: 0.05\n",
"RMSE: 0.07\n",
"\n",
"Final Combined Output Metrics:\n",
"Out of range: 0 predictions\n",
"MAPE: 7.87%\n",
"Within ±2%: 61.66%\n",
"Within ±5%: 75.67%\n",
"Within ±10%: 86.32%\n",
"Within ±20%: 91.11%\n",
"MAE: 0.03\n",
"RMSE: 0.06\n",
"221/221 [==============================] - 20s 92ms/step - loss: 0.0261 - classification_output_loss: 0.0137 - regression_output_loss: 0.0188 - final_output_loss: 0.0227 - val_loss: 0.0359 - val_classification_output_loss: 0.0278 - val_regression_output_loss: 0.0242 - val_final_output_loss: 0.0340\n",
"Epoch 101: early stopping\n",
"\n",
"Training completed successfully!\n",
"\n",
"Classification Metrics:\n",
"Accuracy: 98.65%\n",
"AUC-ROC: 0.9994\n",
"\n",
"Confusion Matrix:\n",
"[[8497 79]\n",
" [ 148 8125]]\n",
"\n",
"Classification Report:\n",
" precision recall f1-score support\n",
"\n",
" Zero 0.9829 0.9908 0.9868 8576\n",
" Non-Zero 0.9904 0.9821 0.9862 8273\n",
"\n",
" accuracy 0.9865 16849\n",
" macro avg 0.9866 0.9864 0.9865 16849\n",
"weighted avg 0.9866 0.9865 0.9865 16849\n",
"\n",
"\n",
"Regression Metrics (non-zero values):\n",
"Out of range: 0 predictions\n",
"MAPE: 10.76%\n",
"Within ±10%: 75.03%\n",
"MAE: 0.05\n",
"RMSE: 0.07\n",
"\n",
"Final Combined Output Metrics:\n",
"Out of range: 0 predictions\n",
"MAPE: 7.87%\n",
"Within ±2%: 61.66%\n",
"Within ±5%: 75.67%\n",
"Within ±10%: 86.32%\n",
"Within ±20%: 91.11%\n",
"MAE: 0.03\n",
"RMSE: 0.06\n"
]
}
],
"source": [
"#Model creation\n",
"print(\"\\n2. Creating model...\")\n",
"input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])\n",
"\n",
"min_val = df['solarenergy'].min()\n",
"min_val_scaled = scaler_y.transform([[0]])[0][0]\n",
"\n",
"max_val = df['solarenergy'].max()\n",
"max_val_scaled = scaler_y.transform([[max_val]])[0][0]\n",
"\n",
"print(f\"\\Min dataset solar energy : {min_val} - Scaled Version : {min_val_scaled}\")\n",
"\n",
"print(f\"\\nMax dataset solar energy : {max_val} - Scaled Version : {max_val_scaled}\")\n",
"\n",
"increase_percentage = 8\n",
"\n",
"max_val = max_val * (1 + increase_percentage / 100)\n",
"max_val_scaled = max_val_scaled * (1 + increase_percentage / 100)\n",
"\n",
"print(f\"Max dataset solar energy increased by {increase_percentage}% : {max_val} - Scaled Version : {max_val_scaled}\")\n",
"\n",
"# Create the hybrid model\n",
"model = create_solarenergy_model(\n",
" input_shape=input_shape, \n",
" folder_name=folder_name, \n",
" min_output=min_val_scaled, \n",
" max_output=max_val_scaled\n",
")\n",
"\n",
"# Prepare binary targets for classification\n",
"y_train_binary = (y_train > 0).astype(float)\n",
"y_test_binary = (y_test > 0).astype(float)\n",
"\n",
"print(\"\\nClass distribution in training set:\")\n",
"print(f\"Zeros: {np.sum(y_train_binary == 0)} ({np.mean(y_train_binary == 0)*100:.2f}%)\")\n",
"print(f\"Non-zeros: {np.sum(y_train_binary == 1)} ({np.mean(y_train_binary == 1)*100:.2f}%)\")\n",
"\n",
"print(\"\\nClass distribution in test set:\")\n",
"print(f\"Zeros: {np.sum(y_test_binary == 0)} ({np.mean(y_test_binary == 0)*100:.2f}%)\")\n",
"print(f\"Non-zeros: {np.sum(y_test_binary == 1)} ({np.mean(y_test_binary == 1)*100:.2f}%)\")\n",
"\n",
"# Get the exact output names from the model\n",
"output_names = [output.name.split('/')[0] for output in model.outputs]\n",
"print(\"\\nModel output names:\", output_names)\n",
"\n",
"print(\"\\n4. Starting training...\")\n",
"history = train_hybrid_model(\n",
" model=model,\n",
" X_train=X_train_seq,\n",
" y_train=y_train,\n",
" X_test=X_test_seq,\n",
" y_test=y_test,\n",
" epochs=150,\n",
" batch_size=512,\n",
" folder_name=folder_name,\n",
" min_output=min_val_scaled,\n",
" max_output=max_val_scaled\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "958d78b99e8898d6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"5. Generating predictions...\n",
"527/527 [==============================] - 6s 10ms/step\n",
"\n",
"6. Evaluating model...\n",
"\n",
"Solar Energy Prediction Metrics:\n",
"\n",
"Absolute Metrics:\n",
"MAE: 0.03 kWh\n",
"RMSE: 0.07 kWh\n",
"R² Score: 0.995\n",
"MAPE: N/A (insufficient data)\n",
"\n",
"Accuracy Metrics:\n",
"Within ±5 kWh: 100.0%\n",
"Within ±10 kWh: 100.0%\n",
"Within ±20 kWh: 100.0%\n",
"\n",
"Level Accuracy:\n",
"Level Accuracy: 97.6%\n",
"\n",
"Confusion Matrix for Energy Levels:\n",
" Low Moderate Very Low\n",
"Low 3539 133 1\n",
"Moderate 26 2082 0\n",
"Very Low 247 0 10821\n",
"\n",
"Plot saved as: 2024-11-27_23-17_energy_analysis.png\n",
"\n",
"Error Statistics:\n",
"Mean error: -0.000\n",
"Error standard deviation: 0.068\n",
"Median error: 0.000\n",
"95th percentile absolute error: 0.137\n"
]
}
],
"source": [
"print(\"\\n5. Generating predictions...\")\n",
"predictions = model.predict(X_test_seq)\n",
"classification_pred, regression_pred, final_pred = predictions\n",
"\n",
"# Inverse transform per tornare ai valori originali\n",
"regression_pred_original = scaler_y.inverse_transform(regression_pred)\n",
"final_pred_original = scaler_y.inverse_transform(final_pred)\n",
"y_test_original = scaler_y.inverse_transform(y_test)\n",
"\n",
"print(\"\\n6. Evaluating model...\")\n",
"# Valutazione delle predizioni finali\n",
"metrics = evaluate_solarenergy_predictions(y_test_original, final_pred_original, folder_name=folder_name)\n",
"\n",
"# Create results dictionary con metriche aggiuntive per il modello ibrido\n",
"training_results = {\n",
" 'model_params': {\n",
" 'input_shape': input_shape,\n",
" 'n_features': len(features),\n",
" 'sequence_length': X_train_seq.shape[1]\n",
" },\n",
" 'training_params': {\n",
" 'batch_size': 192,\n",
" 'total_epochs': len(history.history['loss']),\n",
" 'best_epoch': np.argmin(history.history['val_final_output_loss']) + 1\n",
" },\n",
" 'performance_metrics': {\n",
" 'regression': {\n",
" 'final_loss': float(history.history['val_regression_output_loss'][-1]),\n",
" 'out_of_range_predictions': int(np.sum((regression_pred < 0) | (regression_pred > max_val_scaled)))\n",
" },\n",
" 'final_output': {\n",
" 'final_loss': float(history.history['val_final_output_loss'][-1]),\n",
" 'best_val_loss': float(min(history.history['val_final_output_loss'])),\n",
" 'out_of_range_predictions': int(np.sum((final_pred < 0) | (final_pred > max_val_scaled)))\n",
" }\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "5c05d1d03336b1e4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"7. Predicting missing data...\n",
"7122/7122 [==============================] - 73s 10ms/step\n",
"\n",
"8. Integrating predictions into original dataset...\n",
"\n",
"Prediction Integration Statistics:\n",
"Added 227879 predictions to dataset\n",
"Rows with solar energy after integration: 357615\n",
"\n",
"Filled Values Analysis:\n",
"Zero predictions (classification < 0.5): 117206\n",
"Non-zero predictions (classification >= 0.5): 110673\n",
"\n",
"Non-zero predictions statistics:\n",
"Mean: 1.10\n",
"Median: 0.93\n",
"Std: 0.95\n",
"\n",
"Prediction Statistics:\n",
"Total predictions added: 227879\n",
"\n",
"Classification Statistics:\n",
"Predicted zeros: 117206 (51.43%)\n",
"Predicted non-zeros: 110673 (48.57%)\n",
"Mean classification confidence: 0.4896\n",
"\n",
"Final Predictions Statistics:\n",
"Mean solar energy: 0.64\n",
"Min solar energy: 0.00\n",
"Max solar energy: 3.30\n",
"Zero predictions: 95673 (41.98%)\n",
"\n",
"Training completed successfully!\n"
]
}
],
"source": [
"print(\"\\n7. Predicting missing data...\")\n",
"to_predict_predictions = model.predict(X_to_predict_seq)\n",
"classification_pred, regression_pred, final_pred = to_predict_predictions\n",
"\n",
"# Clip solo le predizioni finali che useremo per l'integrazione\n",
"#final_pred = np.clip(final_pred, min_val_scaled, max_val_scaled)\n",
"final_pred_original = scaler_y.inverse_transform(final_pred)\n",
"\n",
"print(\"\\n8. Integrating predictions into original dataset...\")\n",
"df_updated = integrate_predictions(df.copy(), predictions=(classification_pred, regression_pred, final_pred_original))\n",
"\n",
"df_updated.to_parquet('../../sources/weather_data_solarenergy.parquet')\n",
"\n",
"# Add prediction statistics to training_results\n",
"training_results['prediction_stats'] = {\n",
" 'n_predictions_added': len(final_pred_original),\n",
" 'classification_stats': {\n",
" 'predicted_zeros': int(np.sum(classification_pred < 0.5)),\n",
" 'predicted_non_zeros': int(np.sum(classification_pred >= 0.5)),\n",
" 'mean_confidence': float(classification_pred.mean()),\n",
" },\n",
" 'regression_stats': {\n",
" 'mean_predicted_value': float(regression_pred.mean()),\n",
" 'min_predicted_value': float(regression_pred.min()),\n",
" 'max_predicted_value': float(regression_pred.max()),\n",
" },\n",
" 'final_predictions': {\n",
" 'mean_predicted_solarenergy': float(final_pred_original.mean()),\n",
" 'min_predicted_solarenergy': float(final_pred_original.min()),\n",
" 'max_predicted_solarenergy': float(final_pred_original.max()),\n",
" 'zero_predictions': int(np.sum(final_pred_original == 0)),\n",
" 'non_zero_predictions': int(np.sum(final_pred_original > 0)),\n",
" }\n",
"}\n",
"\n",
"print(\"\\nPrediction Statistics:\")\n",
"print(f\"Total predictions added: {training_results['prediction_stats']['n_predictions_added']}\")\n",
"print(\"\\nClassification Statistics:\")\n",
"print(f\"Predicted zeros: {training_results['prediction_stats']['classification_stats']['predicted_zeros']} \"\n",
" f\"({training_results['prediction_stats']['classification_stats']['predicted_zeros']/len(final_pred_original)*100:.2f}%)\")\n",
"print(f\"Predicted non-zeros: {training_results['prediction_stats']['classification_stats']['predicted_non_zeros']} \"\n",
" f\"({training_results['prediction_stats']['classification_stats']['predicted_non_zeros']/len(final_pred_original)*100:.2f}%)\")\n",
"print(f\"Mean classification confidence: {training_results['prediction_stats']['classification_stats']['mean_confidence']:.4f}\")\n",
"\n",
"print(\"\\nFinal Predictions Statistics:\")\n",
"print(f\"Mean solar energy: {training_results['prediction_stats']['final_predictions']['mean_predicted_solarenergy']:.2f}\")\n",
"print(f\"Min solar energy: {training_results['prediction_stats']['final_predictions']['min_predicted_solarenergy']:.2f}\")\n",
"print(f\"Max solar energy: {training_results['prediction_stats']['final_predictions']['max_predicted_solarenergy']:.2f}\")\n",
"print(f\"Zero predictions: {training_results['prediction_stats']['final_predictions']['zero_predictions']} \"\n",
" f\"({training_results['prediction_stats']['final_predictions']['zero_predictions']/len(final_pred_original)*100:.2f}%)\")\n",
"\n",
"print(\"\\nTraining completed successfully!\")\n",
"\n",
"tf.keras.backend.clear_session()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "ef29b3ecdf12c6db",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 2000x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1500x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1200x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Statistiche principali di Solar Energy:\n",
"--------------------------------------------------\n",
"count : 357,679.0000\n",
"missing : 64.0000\n",
"zeros : 161,156.0000\n",
"mean : 0.6529\n",
"median : 0.0736\n",
"std : 0.9288\n",
"min : 0.0000\n",
"max : 4.0000\n",
"skewness : 1.2834\n",
"kurtosis : 0.3742\n",
"percentile_1 : 0.0000\n",
"percentile_5 : 0.0000\n",
"percentile_10 : 0.0000\n",
"percentile_25 : 0.0000\n",
"percentile_50 : 0.0736\n",
"percentile_75 : 1.1913\n",
"percentile_90 : 2.2530\n",
"percentile_95 : 2.7314\n",
"percentile_99 : 3.1348\n",
"\n",
"Suggerimenti per la normalizzazione:\n",
"--------------------------------------------------\n",
"- La distribuzione è fortemente asimmetrica (skewness > 1)\n",
"- Considerare una trasformazione logaritmica: np.log1p(x)\n",
"- Alta presenza di zeri (45.06%)\n",
"- Considerare un modello in due parti: classificazione degli zeri + regressione sui valori non-zero\n"
]
},
{
"data": {
"text/plain": [
"{'count': 357679,\n",
" 'missing': 64,\n",
" 'zeros': 161156,\n",
" 'mean': 0.6529324282684227,\n",
" 'median': 0.07359524816274643,\n",
" 'std': 0.928826011992019,\n",
" 'min': 0.0,\n",
" 'max': 4.0,\n",
" 'skewness': 1.2833967112068252,\n",
" 'kurtosis': 0.37419692300276486,\n",
" 'percentile_1': 0.0,\n",
" 'percentile_5': 0.0,\n",
" 'percentile_10': 0.0,\n",
" 'percentile_25': 0.0,\n",
" 'percentile_50': 0.07359524816274643,\n",
" 'percentile_75': 1.191302478313446,\n",
" 'percentile_90': 2.2529743671417237,\n",
" 'percentile_95': 2.7313732862472535,\n",
" 'percentile_99': 3.134775576591491}"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"analyze_distribution(df_updated, 'solarenergy', 'Solar Energy')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "e884cc287364c4ed",
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "too many values to unpack (expected 3)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[24], line 157\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPredictions within ±\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mthreshold\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mwithin_threshold\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.1f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 156\u001b[0m \u001b[38;5;66;03m# Example usage\u001b[39;00m\n\u001b[0;32m--> 157\u001b[0m \u001b[43mplot_error_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_test_original\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfinal_pred_original\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfolder_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfolder_name\u001b[49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[24], line 23\u001b[0m, in \u001b[0;36mplot_error_analysis\u001b[0;34m(y_true, predictions, folder_name)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m roc_curve\n\u001b[1;32m 22\u001b[0m \u001b[38;5;66;03m# Unpack predictions\u001b[39;00m\n\u001b[0;32m---> 23\u001b[0m classification_pred, regression_pred, final_pred \u001b[38;5;241m=\u001b[39m predictions\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# Convert to 1D numpy arrays if needed\u001b[39;00m\n\u001b[1;32m 26\u001b[0m y_true \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mravel(y_true)\n",
"\u001b[0;31mValueError\u001b[0m: too many values to unpack (expected 3)"
]
}
],
"source": [
"def plot_error_analysis(y_true, predictions, folder_name=None):\n",
" \"\"\"\n",
" Function to visualize prediction error analysis for the hybrid model\n",
"\n",
" Parameters:\n",
" -----------\n",
" y_true : array-like\n",
" Actual values\n",
" predictions : tuple\n",
" Tuple containing (classification_pred, regression_pred, final_pred)\n",
" folder_name : str, optional\n",
" Directory to save plots. If None, plots are only displayed\n",
"\n",
" Generates:\n",
" ----------\n",
" - Classification analysis plots\n",
" - Regression error analysis plots\n",
" - Final prediction error analysis plots\n",
" \"\"\"\n",
" from sklearn.metrics import roc_curve\n",
"\n",
" # Unpack predictions\n",
" classification_pred, regression_pred, final_pred = predictions\n",
"\n",
" # Convert to 1D numpy arrays if needed\n",
" y_true = np.ravel(y_true)\n",
" classification_pred = np.ravel(classification_pred)\n",
" regression_pred = np.ravel(regression_pred)\n",
" final_pred = np.ravel(final_pred)\n",
"\n",
" # Create binary ground truth\n",
" y_true_binary = (y_true > 0).astype(float)\n",
"\n",
" # Calculate errors for regression and final predictions\n",
" regression_errors = regression_pred - y_true\n",
" final_errors = final_pred - y_true\n",
"\n",
" # Create main figure\n",
" plt.figure(figsize=(20, 15))\n",
"\n",
" # Classification Analysis (Top Row)\n",
" # Plot 1: Classification Distribution\n",
" plt.subplot(3, 3, 1)\n",
" plt.hist(classification_pred, bins=50, alpha=0.7)\n",
" plt.axvline(x=0.5, color='r', linestyle='--')\n",
" plt.title('Classification Probability Distribution')\n",
" plt.xlabel('Classification Probability')\n",
" plt.ylabel('Frequency')\n",
"\n",
" # Plot 2: ROC Curve\n",
" plt.subplot(3, 3, 2)\n",
" fpr, tpr, _ = roc_curve(y_true_binary, classification_pred)\n",
" plt.plot(fpr, tpr)\n",
" plt.plot([0, 1], [0, 1], 'r--')\n",
" plt.title(f'ROC Curve (AUC = {roc_auc_score(y_true_binary, classification_pred):.4f})')\n",
" plt.xlabel('False Positive Rate')\n",
" plt.ylabel('True Positive Rate')\n",
"\n",
" # Plot 3: Classification Confusion Matrix\n",
" plt.subplot(3, 3, 3)\n",
" cm = confusion_matrix(y_true_binary, classification_pred > 0.5)\n",
" sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')\n",
" plt.title('Classification Confusion Matrix')\n",
" plt.xlabel('Predicted')\n",
" plt.ylabel('Actual')\n",
"\n",
" # Regression Analysis (Middle Row)\n",
" # Plot 4: Regression Error Distribution\n",
" plt.subplot(3, 3, 4)\n",
" plt.hist(regression_errors[y_true > 0], bins=50, alpha=0.7)\n",
" plt.title('Regression Error Distribution (Non-zero Values)')\n",
" plt.xlabel('Error')\n",
" plt.ylabel('Frequency')\n",
"\n",
" # Plot 5: Actual vs Predicted (Regression)\n",
" plt.subplot(3, 3, 5)\n",
" mask_nonzero = y_true > 0\n",
" plt.scatter(y_true[mask_nonzero], regression_pred[mask_nonzero], alpha=0.5)\n",
" plt.plot([y_true[mask_nonzero].min(), y_true[mask_nonzero].max()],\n",
" [y_true[mask_nonzero].min(), y_true[mask_nonzero].max()], 'r--', lw=2)\n",
" plt.title('Actual vs Predicted (Regression, Non-zero Values)')\n",
" plt.xlabel('Actual Values')\n",
" plt.ylabel('Predicted Values')\n",
"\n",
" # Plot 6: Regression Errors vs Actual Values\n",
" plt.subplot(3, 3, 6)\n",
" plt.scatter(y_true[mask_nonzero], regression_errors[mask_nonzero], alpha=0.5)\n",
" plt.axhline(y=0, color='r', linestyle='--')\n",
" plt.title('Regression Errors vs Actual Values (Non-zero Values)')\n",
" plt.xlabel('Actual Values')\n",
" plt.ylabel('Error')\n",
"\n",
" # Final Predictions Analysis (Bottom Row)\n",
" # Plot 7: Final Error Distribution\n",
" plt.subplot(3, 3, 7)\n",
" plt.hist(final_errors, bins=50, alpha=0.7)\n",
" plt.title('Final Prediction Error Distribution')\n",
" plt.xlabel('Error')\n",
" plt.ylabel('Frequency')\n",
"\n",
" # Plot 8: Actual vs Predicted (Final)\n",
" plt.subplot(3, 3, 8)\n",
" plt.scatter(y_true, final_pred, alpha=0.5)\n",
" plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)\n",
" plt.title('Actual vs Predicted (Final)')\n",
" plt.xlabel('Actual Values')\n",
" plt.ylabel('Predicted Values')\n",
"\n",
" # Plot 9: Final Errors vs Actual Values\n",
" plt.subplot(3, 3, 9)\n",
" plt.scatter(y_true, final_errors, alpha=0.5)\n",
" plt.axhline(y=0, color='r', linestyle='--')\n",
" plt.title('Final Errors vs Actual Values')\n",
" plt.xlabel('Actual Values')\n",
" plt.ylabel('Error')\n",
"\n",
" plt.tight_layout()\n",
"\n",
" # Save plot if directory is specified\n",
" if folder_name is not None:\n",
" try:\n",
" filename = f'{folder_name}_error_analysis.png'\n",
" plt.savefig(filename, dpi=300, bbox_inches='tight')\n",
" print(f\"\\nPlot saved as: {filename}\")\n",
" except Exception as e:\n",
" print(f\"\\nError saving plot: {str(e)}\")\n",
"\n",
" plt.show()\n",
"\n",
" # Print comprehensive statistics\n",
" print(\"\\nClassification Statistics:\")\n",
" print(classification_report(y_true_binary, classification_pred > 0.5))\n",
" print(f\"AUC-ROC: {roc_auc_score(y_true_binary, classification_pred):.4f}\")\n",
"\n",
" print(\"\\nRegression Statistics (Non-zero values):\")\n",
" mask_nonzero = y_true > 0\n",
" if np.any(mask_nonzero):\n",
" print(f\"MAE: {np.mean(np.abs(regression_errors[mask_nonzero])):.4f}\")\n",
" print(f\"RMSE: {np.sqrt(np.mean(regression_errors[mask_nonzero] ** 2)):.4f}\")\n",
" print(f\"Mean error: {np.mean(regression_errors[mask_nonzero]):.4f}\")\n",
" print(f\"Error std: {np.std(regression_errors[mask_nonzero]):.4f}\")\n",
"\n",
" print(\"\\nFinal Prediction Statistics:\")\n",
" print(f\"MAE: {np.mean(np.abs(final_errors)):.4f}\")\n",
" print(f\"RMSE: {np.sqrt(np.mean(final_errors ** 2)):.4f}\")\n",
" print(f\"Mean error: {np.mean(final_errors):.4f}\")\n",
" print(f\"Error std: {np.std(final_errors):.4f}\")\n",
"\n",
" # Calculate percentage of errors within thresholds\n",
" thresholds = [0.5, 1.0, 1.5, 2.0]\n",
" print(\"\\nError Thresholds (Final Predictions):\")\n",
" for threshold in thresholds:\n",
" within_threshold = np.mean(np.abs(final_errors) <= threshold) * 100\n",
" print(f\"Predictions within ±{threshold}: {within_threshold:.1f}%\")\n",
"\n",
"# Example usage\n",
"plot_error_analysis(y_test, predictions, folder_name=folder_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26c41d23-65bf-4a38-9241-ea9b17effbd5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0rc1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}