{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from opt_einsum.paths import branch_1\n",
    "!apt-get update\n",
    "!apt-get install graphviz -y\n",
    "\n",
    "!pip install tensorflow\n",
    "!pip install numpy\n",
    "!pip install pandas\n",
    "\n",
    "!pip install keras\n",
    "!pip install scikit-learn\n",
    "!pip install matplotlib\n",
    "!pip install joblib\n",
    "!pip install pyarrow\n",
    "!pip install fastparquet\n",
    "!pip install scipy\n",
    "!pip install seaborn\n",
    "!pip install tqdm\n",
    "!pip install pydot\n",
    "!pip install tensorflow-io\n",
    "!pip install tensorflow-addons"
   ],
   "id": "5e0376433a89bbda"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow.keras.layers import Dense, LSTM, MultiHeadAttention, Dropout, BatchNormalization, LayerNormalization, Input, Activation, Lambda, Bidirectional, Add, MaxPooling1D, SpatialDropout1D, GlobalAveragePooling1D, \\\n",
    "    GlobalMaxPooling1D, Concatenate\n",
    "from tensorflow.keras import regularizers\n",
    "from tensorflow.keras.models import Model\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import RobustScaler\n",
    "from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau\n",
    "from tensorflow.keras.optimizers import AdamW\n",
    "import json\n",
    "from datetime import datetime\n",
    "import matplotlib.pyplot as plt\n",
    "from tensorflow.keras.utils import plot_model\n",
    "import tensorflow_addons as tfa\n",
    "import os\n",
    "import joblib\n",
    "import seaborn as sns\n",
    "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, confusion_matrix\n",
    "\n",
    "folder_name = datetime.now().strftime(\"%Y-%m-%d_%H-%M\")\n",
    "\n",
    "random_state_value = None"
   ],
   "id": "4e7cf95955575047"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "def get_season(date):\n",
    "    month = date.month\n",
    "    day = date.day\n",
    "    if (month == 12 and day >= 21) or (month <= 3 and day < 20):\n",
    "        return 'Winter'\n",
    "    elif (month == 3 and day >= 20) or (month <= 6 and day < 21):\n",
    "        return 'Spring'\n",
    "    elif (month == 6 and day >= 21) or (month <= 9 and day < 23):\n",
    "        return 'Summer'\n",
    "    elif (month == 9 and day >= 23) or (month <= 12 and day < 21):\n",
    "        return 'Autumn'\n",
    "    else:\n",
    "        return 'Unknown'\n",
    "\n",
    "\n",
    "def get_time_period(hour):\n",
    "    if 5 <= hour < 12:\n",
    "        return 'Morning'\n",
    "    elif 12 <= hour < 17:\n",
    "        return 'Afternoon'\n",
    "    elif 17 <= hour < 21:\n",
    "        return 'Evening'\n",
    "    else:\n",
    "        return 'Night'\n",
    "\n",
    "\n",
    "def add_time_features(df):\n",
    "    \"\"\"\n",
    "    Add time-based features to the DataFrame.\n",
    "    Works with both 'datetime' as column or index.\n",
    "    \"\"\"\n",
    "    # Se datetime è l'indice, lo usiamo direttamente\n",
    "    if isinstance(df.index, pd.DatetimeIndex):\n",
    "        datetime_col = df.index\n",
    "    else:\n",
    "        # Se datetime è una colonna, la convertiamo\n",
    "        if 'datetime' in df.columns:\n",
    "            datetime_col = pd.to_datetime(df['datetime'])\n",
    "        else:\n",
    "            raise ValueError(\"No datetime column or index found in DataFrame\")\n",
    "\n",
    "    # Creazione delle feature temporali\n",
    "    df['timestamp'] = datetime_col.astype(np.int64) // 10 ** 9\n",
    "    df['year'] = datetime_col.year\n",
    "    df['month'] = datetime_col.month\n",
    "    df['day'] = datetime_col.day\n",
    "    df['hour'] = datetime_col.hour\n",
    "    df['minute'] = datetime_col.minute\n",
    "    df['hour_sin'] = np.sin(datetime_col.hour * (2 * np.pi / 24))\n",
    "    df['hour_cos'] = np.cos(datetime_col.hour * (2 * np.pi / 24))\n",
    "    df['day_of_week'] = datetime_col.dayofweek\n",
    "    df['day_of_year'] = datetime_col.dayofyear\n",
    "    df['week_of_year'] = datetime_col.isocalendar().week.astype(int)\n",
    "    df['quarter'] = datetime_col.quarter\n",
    "    df['is_month_end'] = datetime_col.is_month_end.astype(int)\n",
    "    df['is_quarter_end'] = datetime_col.is_quarter_end.astype(int)\n",
    "    df['is_year_end'] = datetime_col.is_year_end.astype(int)\n",
    "    df['month_sin'] = np.sin(datetime_col.month * (2 * np.pi / 12))\n",
    "    df['month_cos'] = np.cos(datetime_col.month * (2 * np.pi / 12))\n",
    "    df['day_of_year_sin'] = np.sin(datetime_col.dayofyear * (2 * np.pi / 365.25))\n",
    "    df['day_of_year_cos'] = np.cos(datetime_col.dayofyear * (2 * np.pi / 365.25))\n",
    "    df['season'] = datetime_col.map(get_season)\n",
    "    df['time_period'] = datetime_col.hour.map(get_time_period)\n",
    "\n",
    "    return df\n",
    "\n",
    "\n",
    "def add_solar_features(df):\n",
    "    # Solar angle calculation\n",
    "    df['solar_angle'] = np.sin(df['day_of_year'] * (2 * np.pi / 365.25)) * np.sin(df['hour'] * (2 * np.pi / 24))\n",
    "\n",
    "    # Interactions between relevant features\n",
    "    df['cloud_temp_interaction'] = df['cloudcover'] * df['temp']\n",
    "    df['visibility_cloud_interaction'] = df['visibility'] * (100 - df['cloudcover'])\n",
    "\n",
    "    # Derived features\n",
    "    df['clear_sky_index'] = (100 - df['cloudcover']) / 100\n",
    "    df['temp_gradient'] = df['temp'] - df['tempmin']\n",
    "\n",
    "    return df\n",
    "\n",
    "\n",
    "def add_solar_specific_features(df):\n",
    "    \"\"\"\n",
    "    Aggiunge feature specifiche per la predizione della radiazione solare\n",
    "    combinando caratteristiche astronomiche e meteorologiche\n",
    "    \"\"\"\n",
    "    # Caratteristiche astronomiche\n",
    "    df['day_length'] = 12 + 3 * np.sin(2 * np.pi * (df['day_of_year'] - 81) / 365.25)\n",
    "    df['solar_noon'] = 12 - df['hour']\n",
    "    df['solar_elevation'] = np.sin(2 * np.pi * df['day_of_year'] / 365.25) * np.cos(2 * np.pi * df['solar_noon'] / 24)\n",
    "\n",
    "    # Angolo solare teorico\n",
    "    df['solar_angle'] = np.sin(df['hour_sin']) * np.sin(df['day_of_year_sin'])\n",
    "\n",
    "    # Interazioni con condizioni atmosferiche\n",
    "    df['cloud_elevation'] = df['cloudcover'] * df['solar_elevation']\n",
    "    df['visibility_elevation'] = df['visibility'] * df['solar_elevation']\n",
    "    df['uv_cloud_interaction'] = df['uvindex'] * (100 - df['cloudcover']) / 100\n",
    "\n",
    "    # Indici di chiarezza e trasmissione\n",
    "    df['clearness_index'] = (100 - df['cloudcover']) * df['visibility'] / 10000\n",
    "    df['atmospheric_attenuation'] = (df['pressure'] / 1013.25) * (1 - (df['humidity'] / 100) * 0.6)\n",
    "\n",
    "    # Radiazione teorica e attenuazione\n",
    "    df['theoretical_radiation'] = df['solar_angle'].clip(0, 1) * 1000\n",
    "    df['expected_radiation'] = df['theoretical_radiation'] * df['clearness_index']\n",
    "\n",
    "    # Rolling features\n",
    "    df['cloud_rolling_12h'] = df['cloudcover'].rolling(window=12).mean()\n",
    "    df['temp_rolling_12h'] = df['temp'].rolling(window=12).mean()\n",
    "    df['uv_rolling_12h'] = df['uvindex'].rolling(window=12).mean()\n",
    "\n",
    "    # Interazioni temperatura-radiazione\n",
    "    df['temp_radiation_potential'] = df['temp'] * df['solar_elevation']\n",
    "\n",
    "    return df\n",
    "\n",
    "\n",
    "def add_radiation_energy_features(df):\n",
    "    \"\"\"Adds specific features based on solarenergy and uvindex\"\"\"\n",
    "\n",
    "    # Assicuriamoci che l'indice sia di tipo datetime\n",
    "    if not isinstance(df.index, pd.DatetimeIndex):\n",
    "        df.index = pd.to_datetime(df['datetime'])\n",
    "\n",
    "    # Solar energy to UV ratio (independent from solarradiation)\n",
    "    df['energy_uv_ratio'] = df['solarenergy'] / (df['uvindex'] + 1e-6)\n",
    "\n",
    "    # Time aggregations\n",
    "    # Moving averages\n",
    "    windows = [3, 6, 12, 24]  # hours\n",
    "    for w in windows:\n",
    "        df[f'energy_rolling_mean_{w}h'] = df['solarenergy'].rolling(window=w).mean()\n",
    "        df[f'uv_rolling_mean_{w}h'] = df['uvindex'].rolling(window=w).mean()\n",
    "\n",
    "    # Daily aggregations utilizzando datetime\n",
    "    df['energy_daily_sum'] = df.groupby(df.index.date)['solarenergy'].transform('sum')\n",
    "    df['uv_daily_max'] = df.groupby(df.index.date)['uvindex'].transform('max')\n",
    "\n",
    "    # Changes\n",
    "    df['energy_change'] = df['solarenergy'].diff()\n",
    "    df['uv_change'] = df['uvindex'].diff()\n",
    "\n",
    "    # Lag features\n",
    "    lags = [1, 2, 3, 6, 12, 24]  # hours\n",
    "    for lag in lags:\n",
    "        df[f'energy_lag_{lag}h'] = df['solarenergy'].shift(lag)\n",
    "        df[f'uv_lag_{lag}h'] = df['uvindex'].shift(lag)\n",
    "\n",
    "    # Peak indicators\n",
    "    df['is_energy_peak'] = (df['solarenergy'] > df['energy_rolling_mean_6h'] * 1.2).astype(int)\n",
    "    df['is_uv_peak'] = (df['uvindex'] > df['uv_rolling_mean_6h'] * 1.2).astype(int)\n",
    "\n",
    "    # Aggiungiamo alcune metriche di volatilità\n",
    "    df['energy_volatility'] = df['energy_change'].rolling(window=24).std()\n",
    "    df['uv_volatility'] = df['uv_change'].rolling(window=24).std()\n",
    "\n",
    "    # Indice di intensità solare composito\n",
    "    df['solar_intensity_index'] = (df['solarenergy'] * df['uvindex']) / (df['cloudcover'] + 1e-6)\n",
    "\n",
    "    # Interazioni\n",
    "    df['uv_cloud_interaction'] = df['uvindex'] * (100 - df['cloudcover']) / 100\n",
    "    df['energy_temp_interaction'] = df['solarenergy'] * df['temp']\n",
    "\n",
    "    return df\n",
    "\n",
    "\n",
    "def add_advanced_features(df):\n",
    "    \"\"\"\n",
    "    Add all advanced features to the DataFrame\n",
    "    Assumes df has a DatetimeIndex\n",
    "    \"\"\"\n",
    "    # Verifichiamo che abbiamo un DatetimeIndex\n",
    "    if not isinstance(df.index, pd.DatetimeIndex):\n",
    "        raise ValueError(\"DataFrame must have a DatetimeIndex\")\n",
    "\n",
    "    # Existing features\n",
    "    df = add_time_features(df)\n",
    "    df = add_solar_features(df)\n",
    "    df = add_solar_specific_features(df)\n",
    "    df = add_radiation_energy_features(df)\n",
    "\n",
    "    # Weather variable interactions\n",
    "    df['temp_humidity'] = df['temp'] * df['humidity']\n",
    "    df['temp_cloudcover'] = df['temp'] * df['cloudcover']\n",
    "    df['visibility_cloudcover'] = df['visibility'] * df['cloudcover']\n",
    "\n",
    "    # Derived features\n",
    "    df['clear_sky_factor'] = (100 - df['cloudcover']) / 100\n",
    "    df['temp_humidity_interaction'] = df['temp'] * df['humidity'] / 100\n",
    "    df['atmospheric_transparency'] = (100 - df['cloudcover']) * (df['visibility'] / 10)\n",
    "\n",
    "    # Rolling means\n",
    "    df['temp_rolling_mean_6h'] = df['temp'].rolling(window=6).mean()\n",
    "    df['cloudcover_rolling_mean_6h'] = df['cloudcover'].rolling(window=6).mean()\n",
    "\n",
    "    # Lag features\n",
    "    df['temp_1h_lag'] = df['temp'].shift(1)\n",
    "    df['cloudcover_1h_lag'] = df['cloudcover'].shift(1)\n",
    "    df['humidity_1h_lag'] = df['humidity'].shift(1)\n",
    "\n",
    "    # Extreme conditions indicator\n",
    "    df['extreme_conditions'] = ((df['temp'] > df['temp'].quantile(0.75)) &\n",
    "                                (df['humidity'] < df['humidity'].quantile(0.25))).astype(int)\n",
    "\n",
    "    # One-hot encoding for categorical features\n",
    "    df = pd.get_dummies(df, columns=['season', 'time_period'])\n",
    "\n",
    "    return df\n",
    "\n",
    "\n",
    "def prepare_advanced_data(df):\n",
    "    \"\"\"\n",
    "    Prepare data for advanced modeling with proper datetime handling\n",
    "    \"\"\"\n",
    "    # Assicuriamoci che abbiamo una copia del DataFrame\n",
    "    df = df.copy()\n",
    "\n",
    "    # Verifichiamo se datetime è già l'indice\n",
    "    if not isinstance(df.index, pd.DatetimeIndex):\n",
    "        if 'datetime' in df.columns:\n",
    "            df['datetime'] = pd.to_datetime(df['datetime'])\n",
    "            df.set_index('datetime', inplace=True)\n",
    "        else:\n",
    "            raise ValueError(\"No datetime column or index found in DataFrame\")\n",
    "\n",
    "    # Ordiniamo il DataFrame per datetime\n",
    "    df = df.sort_index()\n",
    "\n",
    "    # Apply feature engineering functions\n",
    "    df = add_advanced_features(df)\n",
    "\n",
    "    #all_columns = list(df.columns)\n",
    "    #print(all_columns)\n",
    "\n",
    "    features = {\n",
    "        # Primary Features (strong direct correlation)\n",
    "        'primary_features': [\n",
    "            'uvindex',  # Direct radiation indicator\n",
    "            'cloudcover',  # Cloud coverage\n",
    "            'visibility',  # Atmospheric transparency\n",
    "            'temp',  # Temperature\n",
    "            'pressure',  # Atmospheric pressure\n",
    "            'humidity',  # Humidity\n",
    "        ],\n",
    "\n",
    "        # Astronomical and Temporal Features\n",
    "        'astronomical_features': [\n",
    "            'solar_elevation',  # Solar elevation\n",
    "            'solar_angle',  # Solar angle\n",
    "            'day_length',  # Day length\n",
    "            'hour_sin',  # Daily cycle\n",
    "            'hour_cos',\n",
    "            'day_of_year_sin',  # Annual cycle\n",
    "            'day_of_year_cos',\n",
    "            'month_sin',  # Monthly cycle\n",
    "            'month_cos',\n",
    "        ],\n",
    "\n",
    "        # Key Indices and Interactions\n",
    "        'key_interactions': [\n",
    "            'clear_sky_index',  # Clear sky index\n",
    "            'atmospheric_attenuation',  # Atmospheric attenuation\n",
    "            'theoretical_radiation',  # Theoretical radiation\n",
    "            'expected_radiation',  # Expected radiation\n",
    "            'cloud_elevation',  # Cloud-elevation interaction\n",
    "            'visibility_elevation',  # Visibility-elevation interaction\n",
    "            'uv_cloud_interaction',  # UV-cloud interaction\n",
    "            'temp_radiation_potential',  # Temperature-radiation potential\n",
    "        ],\n",
    "\n",
    "        # Rolling Features (temporal trends)\n",
    "        'rolling_features': [\n",
    "            'cloud_rolling_12h',  # Cloud coverage moving average\n",
    "            'temp_rolling_12h',  # Temperature moving average\n",
    "            'uv_rolling_12h',  # UV moving average\n",
    "            'cloudcover_rolling_mean_6h',\n",
    "            'temp_rolling_mean_6h',\n",
    "        ],\n",
    "\n",
    "        # Lag Features (most recent)\n",
    "        'lag_features': [\n",
    "            'temp_1h_lag',  # 1-hour temperature lag\n",
    "            'cloudcover_1h_lag',  # 1-hour cloud coverage lag\n",
    "            'humidity_1h_lag',  # 1-hour humidity lag\n",
    "            'uv_lag_1h',  # 1-hour UV lag\n",
    "        ],\n",
    "\n",
    "        # Categorical Features\n",
    "        'categorical_features': [\n",
    "            'season_Spring',  # Seasons\n",
    "            'season_Summer',\n",
    "            'season_Autumn',\n",
    "            'season_Winter',\n",
    "            'time_period_Morning',  # Time periods\n",
    "            'time_period_Afternoon',\n",
    "            'time_period_Evening',\n",
    "            'time_period_Night',\n",
    "        ]\n",
    "    }\n",
    "\n",
    "    final_features = [feature for group in features.values() for feature in group]\n",
    "\n",
    "    # Handle missing values\n",
    "    target_variables = ['solarradiation', 'solarenergy', 'uvindex']\n",
    "    for column in final_features + target_variables:\n",
    "        if column in df.columns:\n",
    "            df[column] = df[column].interpolate(method='time')\n",
    "    df.fillna(0, inplace=True)\n",
    "\n",
    "    # Temporal split\n",
    "    data_after_2010 = df[df['year'] >= 2010].copy()\n",
    "    data_before_2010 = df[df['year'] < 2010].copy()\n",
    "\n",
    "    X = data_after_2010[final_features]\n",
    "    y = data_after_2010['solarradiation']\n",
    "    X_to_predict = data_before_2010[final_features]\n",
    "\n",
    "    # Train-test split\n",
    "    X_train, X_test, y_train, y_test = train_test_split(\n",
    "        X, y, test_size=0.2, random_state=42, shuffle=False\n",
    "    )\n",
    "\n",
    "    # Scaling\n",
    "    feature_scaler = RobustScaler()\n",
    "    X_train_scaled = feature_scaler.fit_transform(X_train)\n",
    "    X_test_scaled = feature_scaler.transform(X_test)\n",
    "    X_to_predict_scaled = feature_scaler.transform(X_to_predict)\n",
    "\n",
    "    target_scaler = RobustScaler()\n",
    "    y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1))\n",
    "    y_test_scaled = target_scaler.transform(y_test.values.reshape(-1, 1))\n",
    "\n",
    "    # Print info about selected features\n",
    "    print(\"\\nSelected features:\")\n",
    "    print(f\"Number of features: {len(final_features)}\")\n",
    "    print(\"Features list:\", final_features)\n",
    "\n",
    "    return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, feature_scaler, target_scaler, final_features, X_to_predict_scaled\n",
    "\n",
    "\n",
    "def create_sequence_data(X, sequence_length=24):\n",
    "    \"\"\"\n",
    "    Converts data into sequences for LSTM input\n",
    "    sequence_length represents how many previous hours to consider\n",
    "    \"\"\"\n",
    "    sequences = []\n",
    "    for i in range(len(X) - sequence_length + 1):\n",
    "        sequences.append(X[i:i + sequence_length])\n",
    "    return np.array(sequences)\n",
    "\n",
    "\n",
    "def prepare_hybrid_data(df):\n",
    "    X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, feature_scaler, target_scaler, features, X_to_predict_scaled = prepare_advanced_data(df)\n",
    "\n",
    "    # Convert data into sequences\n",
    "    sequence_length = 24  # 24 hours of historical data\n",
    "\n",
    "    X_train_seq = create_sequence_data(X_train_scaled, sequence_length)\n",
    "    X_test_seq = create_sequence_data(X_test_scaled, sequence_length)\n",
    "\n",
    "    # Adjust y by removing the first (sequence_length-1) elements\n",
    "    y_train = y_train_scaled[sequence_length - 1:]\n",
    "    y_test = y_test_scaled[sequence_length - 1:]\n",
    "\n",
    "    X_to_predict_seq = create_sequence_data(X_to_predict_scaled, sequence_length)\n",
    "\n",
    "    return X_train_seq, X_test_seq, y_train, y_test, feature_scaler, target_scaler, features, X_to_predict_seq"
   ],
   "id": "1f7b15beaf12c0eb"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "def create_residual_lstm_layer(x, units, dropout_rate, l2_reg=0.01, return_sequences=True, survival_probability=0.8):\n",
    "    \"\"\"\n",
    "    Creates a bidirectional LSTM layer with residual connections and regularization.\n",
    "\n",
    "    Parameters:\n",
    "        x: Input tensor\n",
    "        units: Number of LSTM units\n",
    "        dropout_rate: Dropout rate for regularization\n",
    "        l2_reg: L2 regularization factor\n",
    "        return_sequences: Whether to return sequences or just the last output\n",
    "        survival_probability: Probability of layer survival for stochastic depth\n",
    "    \"\"\"\n",
    "    residual = x\n",
    "    x = Bidirectional(LSTM(units, return_sequences=return_sequences, kernel_regularizer=regularizers.l2(l2_reg)))(x)\n",
    "    x = LayerNormalization()(x)\n",
    "    x = Dropout(dropout_rate)(x)\n",
    "\n",
    "    if return_sequences:\n",
    "        if int(residual.shape[-1]) != 2 * units:\n",
    "            residual = Dense(2 * units, activation='linear')(residual)\n",
    "        x = tfa.layers.StochasticDepth(survival_probability)([x, residual])\n",
    "    return x\n",
    "\n",
    "\n",
    "def attention_block(x, units, num_heads=8, survival_probability=0.8):\n",
    "    \"\"\"\n",
    "    Creates a multi-head attention block with residual connections.\n",
    "\n",
    "    Parameters:\n",
    "        x: Input tensor\n",
    "        units: Dimension of the key space\n",
    "        num_heads: Number of attention heads\n",
    "        survival_probability: Probability of layer survival for stochastic depth\n",
    "    \"\"\"\n",
    "    attention = MultiHeadAttention(num_heads=num_heads, key_dim=units)(x, x)\n",
    "    x = tfa.layers.StochasticDepth(survival_probability)([x, attention])\n",
    "    x = LayerNormalization()(x)\n",
    "    return x\n",
    "\n",
    "\n",
    "def create_solarradiation_model(input_shape, folder_name, l2_lambda=0.005, min_output=0, max_output=1):\n",
    "    \"\"\"\n",
    "    Creates a deep learning model for solar radiation prediction using LSTM and attention mechanisms.\n",
    "\n",
    "    Parameters:\n",
    "        input_shape: Shape of input data\n",
    "        folder_name: Directory to save model architecture visualization\n",
    "        l2_lambda: L2 regularization factor\n",
    "    \"\"\"\n",
    "    inputs = Input(shape=input_shape)\n",
    "\n",
    "    # Progressive hyperparameters for model architecture\n",
    "    survival_probs = [0.9, 0.8, 0.7, 0.6]  # Decreasing survival probabilities for deeper layers\n",
    "    attention_survival_probs = [0.85, 0.75, 0.65, 0.55]  # Survival probabilities for attention blocks\n",
    "    lstm_units = [256, 128, 64, 32]  # Decreasing number of units for LSTM layers\n",
    "    dropout_rates = [0.4, 0.3, 0.2, 0.2]  # Decreasing dropout rates\n",
    "    attention_heads = [32, 24, 16, 8]  # Decreasing number of attention heads\n",
    "\n",
    "    lstm_blocks = 4\n",
    "    # Main network architecture\n",
    "    x = inputs\n",
    "    for i in range(lstm_blocks):\n",
    "        # LSTM layer with residual connections\n",
    "        x = create_residual_lstm_layer(\n",
    "            x,\n",
    "            units=lstm_units[i],\n",
    "            dropout_rate=dropout_rates[i],\n",
    "            l2_reg=l2_lambda,\n",
    "            return_sequences=True,\n",
    "            survival_probability=survival_probs[i]\n",
    "        )\n",
    "        # Attention block\n",
    "        x = attention_block(\n",
    "            x,\n",
    "            units=lstm_units[i],\n",
    "            num_heads=attention_heads[i],\n",
    "            survival_probability=attention_survival_probs[i]\n",
    "        )\n",
    "        if i < lstm_blocks - 1:  # No pooling after last LSTM layer\n",
    "            x = MaxPooling1D()(x)\n",
    "\n",
    "    # Final LSTM layer for sequence aggregation\n",
    "    x = create_residual_lstm_layer(\n",
    "        x,\n",
    "        units=32,\n",
    "        dropout_rate=0.1,\n",
    "        l2_reg=l2_lambda,\n",
    "        return_sequences=False,\n",
    "        survival_probability=0.6\n",
    "    )\n",
    "\n",
    "    # Dense layers for final prediction\n",
    "    dense_units = [128, 64, 32]\n",
    "    dense_dropout = [0.2, 0.1, 0.05]\n",
    "\n",
    "    for units, dropout in zip(dense_units, dense_dropout):\n",
    "        x = Dense(units, kernel_regularizer=regularizers.l2(l2_lambda))(x)\n",
    "        x = BatchNormalization()(x)\n",
    "        x = Activation('swish')(x)\n",
    "        x = Dropout(dropout)(x)\n",
    "\n",
    "    # Output layer with value clipping\n",
    "    outputs = Dense(1)(x)\n",
    "    outputs = Lambda(lambda x: tf.clip_by_value(x, min_output, max_output))(outputs)\n",
    "\n",
    "    # Model compilation\n",
    "    model = Model(inputs=inputs, outputs=outputs, name=\"SolarRadiationModel\")\n",
    "\n",
    "    # Improved loss function\n",
    "    def hybrid_focal_loss(y_true, y_pred):\n",
    "        # MSE with focal weighting\n",
    "        mse = tf.square(y_true - y_pred)\n",
    "        error_ratio = tf.abs(y_true - y_pred) / (tf.abs(y_true) + 1.0)\n",
    "        focal_weight = tf.pow(error_ratio, 2)\n",
    "        weighted_mse = focal_weight * mse\n",
    "\n",
    "        # MAE component\n",
    "        mae = tf.abs(y_true - y_pred)\n",
    "\n",
    "        return tf.reduce_mean(0.7 * weighted_mse + 0.3 * mae)\n",
    "\n",
    "    # Custom metrics\n",
    "    def rmse(y_true, y_pred):\n",
    "        return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))\n",
    "\n",
    "    def custom_mape(y_true, y_pred):\n",
    "        epsilon = 1e-7\n",
    "        diff = tf.abs((y_true - y_pred) / (y_true + epsilon))\n",
    "        diff = tf.clip_by_value(diff, 0, 1)\n",
    "        return tf.reduce_mean(diff) * 100\n",
    "\n",
    "    # Learning rate schedule\n",
    "    lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(\n",
    "        initial_learning_rate=0.0003,\n",
    "        first_decay_steps=1000,\n",
    "        t_mul=2.0,\n",
    "        m_mul=0.9,\n",
    "        alpha=1e-5\n",
    "    )\n",
    "\n",
    "    # Optimizer\n",
    "    optimizer = AdamW(\n",
    "        learning_rate=lr_schedule,\n",
    "        beta_1=0.9,\n",
    "        beta_2=0.999,\n",
    "        epsilon=1e-7,\n",
    "        weight_decay=0.001,\n",
    "        amsgrad=True\n",
    "    )\n",
    "\n",
    "    model.compile(\n",
    "        optimizer=optimizer,\n",
    "        loss=hybrid_focal_loss,\n",
    "        metrics=['mse', 'mae', rmse, custom_mape]\n",
    "    )\n",
    "\n",
    "    model.summary()\n",
    "\n",
    "    plot_model(model,\n",
    "               to_file=f'{folder_name}_model_architecture.png',\n",
    "               show_shapes=True,\n",
    "               show_layer_names=True,\n",
    "               dpi=150,\n",
    "               show_layer_activations=True)\n",
    "\n",
    "    return model\n",
    "\n",
    "\n",
    "def evaluate_solarradiation_predictions(y_true, y_pred, hour=None, folder_name=None):\n",
    "    \"\"\"\n",
    "    Comprehensive evaluation of solar radiation predictions with detailed analysis and visualizations.\n",
    "\n",
    "    Parameters:\n",
    "    -----------\n",
    "    y_true : array-like\n",
    "        Actual solar radiation values (W/m²)\n",
    "    y_pred : array-like\n",
    "        Predicted solar radiation values (W/m²)\n",
    "    hour : array-like, optional\n",
    "        Array of hours corresponding to predictions, for temporal analysis\n",
    "    folder_name : str, optional\n",
    "        Directory to save analysis plots\n",
    "\n",
    "    Returns:\n",
    "    --------\n",
    "    dict\n",
    "        Dictionary containing all calculated metrics\n",
    "    \"\"\"\n",
    "\n",
    "    # Data preparation\n",
    "    y_true = np.array(y_true).ravel()\n",
    "    y_pred = np.array(y_pred).ravel()\n",
    "    errors = y_pred - y_true\n",
    "\n",
    "    # Basic metrics calculation\n",
    "    mae_raw = mean_absolute_error(y_true, y_pred)\n",
    "    rmse_raw = np.sqrt(mean_squared_error(y_true, y_pred))\n",
    "    r2_raw = r2_score(y_true, y_pred)\n",
    "    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-7))) * 100\n",
    "\n",
    "    # Error margin accuracy\n",
    "    within_5_percent = np.mean(np.abs((y_pred - y_true) / (y_true + 1e-7)) <= 0.05)\n",
    "    within_10_percent = np.mean(np.abs((y_pred - y_true) / (y_true + 1e-7)) <= 0.10)\n",
    "    within_20_percent = np.mean(np.abs((y_pred - y_true) / (y_true + 1e-7)) <= 0.20)\n",
    "\n",
    "    # Radiation level classification\n",
    "    def get_radiation_level(value):\n",
    "        if value <= 200:\n",
    "            return 'Very Low'\n",
    "        elif value <= 400:\n",
    "            return 'Low'\n",
    "        elif value <= 600:\n",
    "            return 'Moderate'\n",
    "        elif value <= 800:\n",
    "            return 'High'\n",
    "        elif value <= 1000:\n",
    "            return 'Very High'\n",
    "        else:\n",
    "            return 'Extreme'\n",
    "\n",
    "    # Calculate radiation levels\n",
    "    y_true_levels = [get_radiation_level(v) for v in y_true]\n",
    "    y_pred_levels = [get_radiation_level(v) for v in y_pred]\n",
    "    level_accuracy = np.mean([t == p for t, p in zip(y_true_levels, y_pred_levels)])\n",
    "\n",
    "    # Print main metrics\n",
    "    print(\"\\nSolar Radiation Prediction Metrics:\")\n",
    "    print(\"\\nAbsolute Metrics:\")\n",
    "    print(f\"MAE: {mae_raw:.2f} W/m²\")\n",
    "    print(f\"RMSE: {rmse_raw:.2f} W/m²\")\n",
    "    print(f\"R² Score: {r2_raw:.3f}\")\n",
    "    print(f\"MAPE: {mape:.2f}%\")\n",
    "\n",
    "    print(\"\\nPercentage Accuracy:\")\n",
    "    print(f\"Within ±5%: {within_5_percent * 100:.1f}%\")\n",
    "    print(f\"Within ±10%: {within_10_percent * 100:.1f}%\")\n",
    "    print(f\"Within ±20%: {within_20_percent * 100:.1f}%\")\n",
    "\n",
    "    print(\"\\nLevel Accuracy:\")\n",
    "    print(f\"Level Accuracy: {level_accuracy * 100:.1f}%\")\n",
    "\n",
    "    # Confusion matrix for radiation levels\n",
    "    cm = confusion_matrix(y_true_levels, y_pred_levels)\n",
    "    print(\"\\nConfusion Matrix for Radiation Levels:\")\n",
    "    cm_df = pd.DataFrame(\n",
    "        cm,\n",
    "        columns=['Very Low', 'Low', 'Moderate', 'High', 'Very High', 'Extreme'],\n",
    "        index=['Very Low', 'Low', 'Moderate', 'High', 'Very High', 'Extreme']\n",
    "    )\n",
    "    print(cm_df)\n",
    "\n",
    "    # Time period analysis\n",
    "    if hour is not None:\n",
    "        day_periods = {\n",
    "            'Morning (5-11)': (5, 11),\n",
    "            'Noon (11-13)': (11, 13),\n",
    "            'Afternoon (13-17)': (13, 17),\n",
    "            'Evening (17-21)': (17, 21),\n",
    "            'Night (21-5)': (21, 5)\n",
    "        }\n",
    "\n",
    "        print(\"\\nAnalysis by Time Period:\")\n",
    "        for period, (start, end) in day_periods.items():\n",
    "            if start < end:\n",
    "                mask = (hour >= start) & (hour < end)\n",
    "            else:\n",
    "                mask = (hour >= start) | (hour < end)\n",
    "\n",
    "            if np.any(mask):\n",
    "                period_mae = mean_absolute_error(y_true[mask], y_pred[mask])\n",
    "                period_mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / (y_true[mask] + 1e-7))) * 100\n",
    "                print(f\"\\n{period}:\")\n",
    "                print(f\"MAE: {period_mae:.2f} W/m²\")\n",
    "                print(f\"MAPE: {period_mape:.2f}%\")\n",
    "\n",
    "    # Visualizations\n",
    "    if folder_name is not None:\n",
    "        try:\n",
    "\n",
    "            # Figure 1: Main analysis plots\n",
    "            plt.figure(figsize=(20, 15))\n",
    "\n",
    "            # Plot 1: Scatter plot of actual vs predicted values\n",
    "            plt.subplot(3, 2, 1)\n",
    "            plt.scatter(y_true, y_pred, alpha=0.5)\n",
    "            plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)\n",
    "            plt.xlabel('Actual Radiation (W/m²)')\n",
    "            plt.ylabel('Predicted Radiation (W/m²)')\n",
    "            plt.title('Actual vs Predicted Values')\n",
    "            plt.grid(True)\n",
    "\n",
    "            # Plot 2: Absolute error distribution\n",
    "            plt.subplot(3, 2, 2)\n",
    "            plt.hist(errors, bins=50, alpha=0.7)\n",
    "            plt.xlabel('Prediction Error (W/m²)')\n",
    "            plt.ylabel('Frequency')\n",
    "            plt.title('Error Distribution')\n",
    "            plt.grid(True)\n",
    "\n",
    "            # Plot 3: Percentage error distribution\n",
    "            plt.subplot(3, 2, 3)\n",
    "            percentage_errors = ((y_pred - y_true) / (y_true + 1e-7)) * 100\n",
    "            plt.hist(np.clip(percentage_errors, -100, 100), bins=50, alpha=0.7)\n",
    "            plt.xlabel('Percentage Error (%)')\n",
    "            plt.ylabel('Frequency')\n",
    "            plt.title('Percentage Error Distribution')\n",
    "            plt.grid(True)\n",
    "\n",
    "            # Plot 4: Errors vs actual values\n",
    "            plt.subplot(3, 2, 4)\n",
    "            plt.scatter(y_true, errors, alpha=0.5)\n",
    "            plt.axhline(y=0, color='r', linestyle='--')\n",
    "            plt.xlabel('Actual Radiation (W/m²)')\n",
    "            plt.ylabel('Error (W/m²)')\n",
    "            plt.title('Errors vs Actual Values')\n",
    "            plt.grid(True)\n",
    "\n",
    "            # Plot 5: Error boxplot by radiation level\n",
    "            plt.subplot(3, 2, 5)\n",
    "            sns.boxplot(x=[get_radiation_level(v) for v in y_true], y=errors)\n",
    "            plt.xticks(rotation=45)\n",
    "            plt.xlabel('Radiation Level')\n",
    "            plt.ylabel('Error (W/m²)')\n",
    "            plt.title('Error Distribution by Level')\n",
    "\n",
    "            # Plot 6: Confusion matrix heatmap\n",
    "            plt.subplot(3, 2, 6)\n",
    "            sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')\n",
    "            plt.title('Confusion Matrix')\n",
    "            plt.xticks(rotation=45)\n",
    "            plt.yticks(rotation=45)\n",
    "\n",
    "            plt.tight_layout()\n",
    "            filename = f'{folder_name}_radiation_analysis.png'\n",
    "            plt.savefig(filename, dpi=300, bbox_inches='tight')\n",
    "            print(f\"\\nPlot saved as: {filename}\")\n",
    "            plt.close()\n",
    "\n",
    "        except Exception as e:\n",
    "            print(f\"\\nError saving plots: {str(e)}\")\n",
    "\n",
    "    # Additional error statistics\n",
    "    print(\"\\nError Statistics:\")\n",
    "    print(f\"Mean error: {np.mean(errors):.3f}\")\n",
    "    print(f\"Error standard deviation: {np.std(errors):.3f}\")\n",
    "    print(f\"Median error: {np.median(errors):.3f}\")\n",
    "    print(f\"95th percentile absolute error: {np.percentile(np.abs(errors), 95):.3f}\")\n",
    "\n",
    "    # Return structured metrics\n",
    "    metrics = {\n",
    "        'absolute': {\n",
    "            'mae': mae_raw,\n",
    "            'rmse': rmse_raw,\n",
    "            'r2': r2_raw,\n",
    "            'mape': mape\n",
    "        },\n",
    "        'percentage_accuracy': {\n",
    "            'within_5_percent': within_5_percent,\n",
    "            'within_10_percent': within_10_percent,\n",
    "            'within_20_percent': within_20_percent\n",
    "        },\n",
    "        'categorical': {\n",
    "            'level_accuracy': level_accuracy\n",
    "        },\n",
    "        'error_stats': {\n",
    "            'mean': float(np.mean(errors)),\n",
    "            'std': float(np.std(errors)),\n",
    "            'median': float(np.median(errors)),\n",
    "            'p95_abs': float(np.percentile(np.abs(errors), 95))\n",
    "        }\n",
    "    }\n",
    "\n",
    "    return metrics\n",
    "\n",
    "\n",
    "def plot_training_history(history, folder_name=None):\n",
    "    \"\"\"\n",
    "    Visualize and save training loss and metrics plots\n",
    "\n",
    "    Parameters:\n",
    "    -----------\n",
    "    history : tensorflow.keras.callbacks.History\n",
    "        History object returned by model training\n",
    "    folder_name : str\n",
    "        Directory to save the plots and metrics\n",
    "    \"\"\"\n",
    "\n",
    "    try:\n",
    "        # Create figure\n",
    "        plt.figure(figsize=(12, 4))\n",
    "\n",
    "        # Loss plot\n",
    "        plt.subplot(1, 2, 1)\n",
    "        plt.plot(history.history['loss'], label='Training Loss')\n",
    "        plt.plot(history.history['val_loss'], label='Validation Loss')\n",
    "        plt.title('Model Loss')\n",
    "        plt.xlabel('Epoch')\n",
    "        plt.ylabel('Loss')\n",
    "        plt.legend()\n",
    "        plt.grid(True)\n",
    "\n",
    "        # MAE plot\n",
    "        plt.subplot(1, 2, 2)\n",
    "        plt.plot(history.history['mae'], label='Training MAE')\n",
    "        plt.plot(history.history['val_mae'], label='Validation MAE')\n",
    "        plt.title('Model MAE')\n",
    "        plt.xlabel('Epoch')\n",
    "        plt.ylabel('MAE')\n",
    "        plt.legend()\n",
    "        plt.grid(True)\n",
    "\n",
    "        plt.tight_layout()\n",
    "\n",
    "        if folder_name is not None:\n",
    "            # Generate filename with timestamp\n",
    "            filename = f'{folder_name}_training_history.png'  # Rimossa parentesi extra\n",
    "\n",
    "            # Save figure\n",
    "            plt.savefig(filename, dpi=300, bbox_inches='tight')\n",
    "            print(f\"\\nTraining history plot saved as: {filename}\")\n",
    "\n",
    "        # Save numerical data in CSV format\n",
    "        history_df = pd.DataFrame({\n",
    "            'epoch': range(1, len(history.history['loss']) + 1),\n",
    "            'training_loss': history.history['loss'],\n",
    "            'validation_loss': history.history['val_loss'],\n",
    "            'training_mae': history.history['mae'],\n",
    "            'validation_mae': history.history['val_mae']})\n",
    "\n",
    "        if folder_name is not None:\n",
    "            csv_filename = f'{folder_name}_training_history.csv'  # Rimossa parentesi extra\n",
    "            history_df.to_csv(csv_filename, index=False)\n",
    "            print(f\"Training history data saved as: {csv_filename}\")\n",
    "\n",
    "        # Calculate and save final statistics\n",
    "        final_stats = {\n",
    "            'final_training_loss': history.history['loss'][-1],\n",
    "            'final_validation_loss': history.history['val_loss'][-1],\n",
    "            'final_training_mae': history.history['mae'][-1],\n",
    "            'final_validation_mae': history.history['val_mae'][-1],\n",
    "            'best_validation_loss': min(history.history['val_loss']),\n",
    "            'best_validation_mae': min(history.history['val_mae']),\n",
    "            'epochs': len(history.history['loss']),\n",
    "        }\n",
    "\n",
    "        if folder_name is not None:\n",
    "            # Save statistics in JSON format\n",
    "            stats_filename = f'{folder_name}_training_stats.json'  # Rimossa parentesi extra\n",
    "            with open(stats_filename, 'w') as f:\n",
    "                json.dump(final_stats, f, indent=4)\n",
    "            print(f\"Final statistics saved as: {stats_filename}\")\n",
    "\n",
    "        # Print main statistics\n",
    "        print(\"\\nFinal Training Statistics:\")\n",
    "        print(f\"Final Loss (train/val): {final_stats['final_training_loss']:.4f}/{final_stats['final_validation_loss']:.4f}\")\n",
    "        print(f\"Final MAE (train/val): {final_stats['final_training_mae']:.4f}/{final_stats['final_validation_mae']:.4f}\")\n",
    "        print(f\"Best validation loss: {final_stats['best_validation_loss']:.4f}\")\n",
    "        print(f\"Best validation MAE: {final_stats['best_validation_mae']:.4f}\")\n",
    "\n",
    "        plt.show()\n",
    "\n",
    "    except Exception as e:\n",
    "        print(f\"\\nError during plot creation or saving: {str(e)}\")\n",
    "\n",
    "\n",
    "def train_hybrid_model(model, X_train, y_train, X_test, y_test, epochs=100, batch_size=32, folder_name='solarradiation'):\n",
    "    \"\"\"\n",
    "    Advanced training function for the hybrid solar radiation model\n",
    "    \"\"\"\n",
    "\n",
    "    def calculate_metrics(y_true, y_pred):\n",
    "        \"\"\"Helper function to calculate metrics safely\"\"\"\n",
    "        y_true = np.array(y_true).flatten()\n",
    "        y_pred = np.array(y_pred).flatten()\n",
    "\n",
    "        # Count out of range predictions\n",
    "        out_of_range = np.sum((y_pred < 0) | (y_pred > 1500))\n",
    "\n",
    "        # Calculate MAPE with clipping to avoid extreme values\n",
    "        diff = np.abs((y_true - y_pred) / (y_true + 1e-7))\n",
    "        diff = np.clip(diff, 0, 1)  # Clip to maximum 100% error\n",
    "        mape = np.mean(diff) * 100\n",
    "\n",
    "        # Calculate accuracy within 10%\n",
    "        within_10_percent = np.mean(diff <= 0.10) * 100\n",
    "\n",
    "        # Calculate MAE and RMSE\n",
    "        mae = np.mean(np.abs(y_true - y_pred))\n",
    "        rmse = np.sqrt(np.mean(np.square(y_true - y_pred)))\n",
    "\n",
    "        return out_of_range, mape, within_10_percent, mae, rmse\n",
    "\n",
    "    callbacks = [\n",
    "        EarlyStopping(\n",
    "            monitor='val_loss',\n",
    "            patience=15,\n",
    "            restore_best_weights=True,\n",
    "            mode='min',\n",
    "            verbose=1,\n",
    "            min_delta=1e-4\n",
    "        ),\n",
    "        ReduceLROnPlateau(\n",
    "            monitor='val_loss',\n",
    "            factor=0.2,\n",
    "            patience=5,\n",
    "            verbose=1,\n",
    "            mode='min',\n",
    "            min_delta=1e-4,\n",
    "            cooldown=3,\n",
    "            min_lr=1e-7\n",
    "        ),\n",
    "        tf.keras.callbacks.ModelCheckpoint(\n",
    "            filepath=f'{folder_name}_best_model.h5',\n",
    "            monitor='val_loss',\n",
    "            save_best_only=True,\n",
    "            mode='min',\n",
    "            save_weights_only=False\n",
    "        ),\n",
    "        tf.keras.callbacks.TensorBoard(\n",
    "            log_dir=f'./{folder_name}_logs',\n",
    "            histogram_freq=1,\n",
    "            write_graph=True,\n",
    "            update_freq='epoch'\n",
    "        ),\n",
    "        tf.keras.callbacks.LambdaCallback(\n",
    "            on_epoch_end=lambda epoch, logs: (\n",
    "                    print(f\"\\nEpoch {epoch + 1} Detailed Metrics:\") and\n",
    "                    (lambda: (\n",
    "                        y_pred := model.predict(X_test, verbose=0),\n",
    "                        metrics := calculate_metrics(y_test, y_pred),\n",
    "                        print(f\"Out of range: {metrics[0]} predictions\"),\n",
    "                        print(f\"MAPE: {metrics[1]:.2f}%\"),\n",
    "                        print(f\"Within ±10%: {metrics[2]:.2f}%\"),\n",
    "                        print(f\"MAE: {metrics[3]:.2f}\"),\n",
    "                        print(f\"RMSE: {metrics[4]:.2f}\")\n",
    "                    ))()\n",
    "            ) if epoch % 5 == 0 else None\n",
    "        )\n",
    "    ]\n",
    "\n",
    "    try:\n",
    "        history = model.fit(\n",
    "            X_train, y_train,\n",
    "            validation_data=(X_test, y_test),\n",
    "            epochs=epochs,\n",
    "            batch_size=batch_size,\n",
    "            callbacks=callbacks,\n",
    "            verbose=1,\n",
    "            shuffle=False\n",
    "        )\n",
    "\n",
    "        print(\"\\nTraining completed successfully!\")\n",
    "\n",
    "        # Final evaluation\n",
    "        final_pred = model.predict(X_test, verbose=0)\n",
    "        metrics = calculate_metrics(y_test, final_pred)\n",
    "\n",
    "        print(\"\\nFinal Model Performance:\")\n",
    "        print(f\"Out of range predictions: {metrics[0]} ({metrics[0] / len(y_test) * 100:.2f}%)\")\n",
    "        print(f\"MAPE: {metrics[1]:.2f}%\")\n",
    "        print(f\"Predictions within ±10%: {metrics[2]:.2f}%\")\n",
    "        print(f\"MAE: {metrics[3]:.2f}\")\n",
    "        print(f\"RMSE: {metrics[4]:.2f}\")\n",
    "\n",
    "        plot_training_history(history, folder_name=folder_name)\n",
    "\n",
    "        return history\n",
    "\n",
    "    except Exception as e:\n",
    "        print(f\"\\nError during training: {str(e)}\")\n",
    "        raise\n",
    "\n",
    "    finally:\n",
    "        tf.keras.backend.clear_session()\n",
    "\n",
    "\n",
    "def integrate_predictions(df, predictions, sequence_length=24):\n",
    "    \"\"\"\n",
    "    Integrates solar radiation predictions into the original dataset for pre-2010 data.\n",
    "\n",
    "    Parameters:\n",
    "    -----------\n",
    "    df : pandas.DataFrame\n",
    "        Original dataset\n",
    "    predictions : numpy.ndarray\n",
    "        Array of solar radiation predictions\n",
    "    sequence_length : int\n",
    "        Sequence length used for predictions\n",
    "\n",
    "    Returns:\n",
    "    --------\n",
    "    pandas.DataFrame\n",
    "        Updated dataset with solar radiation predictions\n",
    "    \"\"\"\n",
    "    # Convert datetime to datetime format if not already\n",
    "    df['datetime'] = pd.to_datetime(df['datetime'])\n",
    "\n",
    "    # Identify pre-2010 rows\n",
    "    mask_pre_2010 = df['datetime'].dt.year < 2010\n",
    "\n",
    "    # Create temporary DataFrame with predictions\n",
    "    dates_pre_2010 = df[mask_pre_2010]['datetime'].iloc[sequence_length - 1:]\n",
    "    predictions_df = pd.DataFrame({\n",
    "        'datetime': dates_pre_2010,\n",
    "        'solarradiation_predicted': predictions.flatten()})\n",
    "\n",
    "    # Merge with original dataset\n",
    "    df = df.merge(predictions_df, on='datetime', how='left')\n",
    "\n",
    "    # Update solar radiation column where missing\n",
    "    df['solarradiation'] = df['solarradiation'].fillna(df['solarradiation_predicted'])\n",
    "\n",
    "    # Remove temporary column\n",
    "    df = df.drop('solarradiation_predicted', axis=1)\n",
    "\n",
    "    print(f\"Added {len(predictions)} predictions to dataset\")\n",
    "    print(f\"Rows with solar radiation after integration: {df['solarradiation'].notna().sum()}\")\n",
    "\n",
    "    return df"
   ],
   "id": "93bc105a8dad086d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "df = pd.read_parquet('../../sources/weather_data_uvindex.parquet')\n",
    "\n",
    "# Data preparation\n",
    "print(\"\\n1. Preparing data...\")\n",
    "X_train_seq, X_test_seq, y_train, y_test, feature_scaler, target_scaler, features, X_to_predict_seq = prepare_hybrid_data(df)\n",
    "\n",
    "print(f\"Training data shape: {X_train_seq.shape}\")\n",
    "print(f\"Test data shape: {X_test_seq.shape}\")\n",
    "\n",
    "# Save or load scaler and features\n",
    "feature_scaler_path = f'{folder_name}_feature_scaler.joblib'\n",
    "target_scaler_path = f'{folder_name}_target_scaler.joblib'\n",
    "features_path = f'{folder_name}_features.json'\n",
    "model_path = f'{folder_name}_best_model.h5'\n",
    "history_path = f'{folder_name}_training_history.json'\n",
    "\n",
    "if os.path.exists(feature_scaler_path):\n",
    "    print(f\"Loading existing scaler X from: {feature_scaler_path}\")\n",
    "    scaler = joblib.load(feature_scaler_path)\n",
    "else:\n",
    "    print(f\"Saving scaler X to: {feature_scaler_path}\")\n",
    "    joblib.dump(feature_scaler, feature_scaler_path)\n",
    "\n",
    "if os.path.exists(target_scaler_path):\n",
    "    print(f\"Loading existing scaler X from: {target_scaler_path}\")\n",
    "    scaler = joblib.load(target_scaler_path)\n",
    "else:\n",
    "    print(f\"Saving scaler X to: {target_scaler_path}\")\n",
    "    joblib.dump(target_scaler, target_scaler_path)\n",
    "\n",
    "if os.path.exists(features_path):\n",
    "    print(f\"Loading existing features from: {features_path}\")\n",
    "    with open(features_path, 'r') as f:\n",
    "        features = json.load(f)\n",
    "else:\n",
    "    print(f\"Saving features to: {features_path}\")\n",
    "    with open(features_path, 'w') as f:\n",
    "        json.dump(features, f)\n",
    "\n",
    "# Data quality verification\n",
    "if np.isnan(X_train_seq).any() or np.isnan(y_train).any():\n",
    "    raise ValueError(\"Found NaN values in training data\")"
   ],
   "id": "8f346ed2fea89ac5"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Model creation\n",
    "print(\"\\n2. Creating model...\")\n",
    "input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])\n",
    "\n",
    "MAX_RADIATION = 885\n",
    "\n",
    "min_val_scaled = target_scaler.transform([[0]])[0][0]\n",
    "max_val_scaled = target_scaler.transform([[MAX_RADIATION]])[0][0]\n",
    "\n",
    "model = create_solarradiation_model(input_shape=input_shape, folder_name=folder_name, min_output=min_val_scaled, max_output=max_val_scaled)\n",
    "\n",
    "print(\"\\n4. Starting training...\")\n",
    "history = train_hybrid_model(\n",
    "    model=model,\n",
    "    X_train=X_train_seq,\n",
    "    y_train=y_train,\n",
    "    X_test=X_test_seq,\n",
    "    y_test=y_test,\n",
    "    epochs=100,\n",
    "    batch_size=192,\n",
    "    folder_name=folder_name\n",
    ")"
   ],
   "id": "38a2ad01e7fd683c"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "print(\"\\n5. Generating predictions...\")\n",
    "predictions = model.predict(X_test_seq)\n",
    "predictions = np.clip(predictions, 0, 11)\n",
    "\n",
    "predictions_original = target_scaler.inverse_transform(predictions)\n",
    "y_test_original = target_scaler.inverse_transform(y_test)\n",
    "\n",
    "print(\"\\n6. Evaluating model...\")\n",
    "metrics = evaluate_solarradiation_predictions(y_test_original, predictions_original, folder_name=folder_name)\n",
    "\n",
    "# Create results dictionary\n",
    "training_results = {\n",
    "    'model_params': {\n",
    "        'input_shape': input_shape,\n",
    "        'n_features': len(features),\n",
    "        'sequence_length': X_train_seq.shape[1]\n",
    "    },\n",
    "    'training_params': {\n",
    "        'batch_size': 32,\n",
    "        'total_epochs': len(history.history['loss']),\n",
    "        'best_epoch': np.argmin(history.history['val_loss']) + 1\n",
    "    },\n",
    "    'performance_metrics': {\n",
    "        'final_loss': float(history.history['val_loss'][-1]),\n",
    "        'final_mae': float(history.history['val_mae'][-1]),\n",
    "        'best_val_loss': float(min(history.history['val_loss'])),\n",
    "        'out_of_range_predictions': int(np.sum((predictions < 0) | (predictions > 11)))\n",
    "    }\n",
    "}"
   ],
   "id": "d9d0c8e71043aa2e"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "print(\"\\n7. Predicting missing data...\")\n",
    "to_predict_predictions = model.predict(X_to_predict_seq)\n",
    "to_predict_predictions = np.clip(to_predict_predictions, 0, 11)\n",
    "\n",
    "print(\"\\n8. Integrating predictions into original dataset...\")\n",
    "df_updated = integrate_predictions(df.copy(), to_predict_predictions)\n",
    "\n",
    "df_updated.to_parquet('../../sources/weather_data_solarradiation.parquet')\n",
    "\n",
    "# Add prediction statistics to training_results\n",
    "training_results['prediction_stats'] = {\n",
    "    'n_predictions_added': len(to_predict_predictions),\n",
    "    'mean_predicted_solarradiation': float(to_predict_predictions.mean()),\n",
    "    'min_predicted_solarradiation': float(to_predict_predictions.min()),\n",
    "    'max_predicted_solarradiation': float(to_predict_predictions.max()),\n",
    "}\n",
    "\n",
    "print(\"\\nTraining completed successfully!\")"
   ],
   "id": "de61df609e8053a2"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "def plot_error_analysis(y_true, y_pred, folder_name=None):\n",
    "    \"\"\"\n",
    "    Function to visualize prediction error analysis\n",
    "\n",
    "    Parameters:\n",
    "    -----------\n",
    "    y_true : array-like\n",
    "        Actual values\n",
    "    y_pred : array-like\n",
    "        Predicted values\n",
    "    folder_name : str, optional\n",
    "        Directory to save plots. If None, plots are only displayed\n",
    "\n",
    "    Generates:\n",
    "    ----------\n",
    "    - Error distribution histogram\n",
    "    - Actual vs Predicted scatter plot\n",
    "    - Errors vs Actual Values scatter plot\n",
    "    - Comprehensive error statistics\n",
    "    \"\"\"\n",
    "\n",
    "    # Convert to 1D numpy arrays if needed\n",
    "    if isinstance(y_true, pd.Series):\n",
    "        y_true = y_true.values\n",
    "    if isinstance(y_pred, pd.Series):\n",
    "        y_pred = y_pred.values\n",
    "\n",
    "    y_true = y_true.ravel()\n",
    "    y_pred = y_pred.ravel()\n",
    "\n",
    "    # Calculate errors\n",
    "    errors = y_pred - y_true\n",
    "\n",
    "    # Create main figure\n",
    "    fig = plt.figure(figsize=(15, 5))\n",
    "\n",
    "    # Plot 1: Error Distribution\n",
    "    plt.subplot(1, 3, 1)\n",
    "    plt.hist(errors, bins=50, alpha=0.7)\n",
    "    plt.title('Prediction Error Distribution')\n",
    "    plt.xlabel('Error')\n",
    "    plt.ylabel('Frequency')\n",
    "\n",
    "    # Plot 2: Actual vs Predicted\n",
    "    plt.subplot(1, 3, 2)\n",
    "    plt.scatter(y_true, y_pred, alpha=0.5)\n",
    "    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)\n",
    "    plt.title('Actual vs Predicted Values')\n",
    "    plt.xlabel('Actual Values')\n",
    "    plt.ylabel('Predicted Values')\n",
    "\n",
    "    # Plot 3: Errors vs Actual Values\n",
    "    plt.subplot(1, 3, 3)\n",
    "    plt.scatter(y_true, errors, alpha=0.5)\n",
    "    plt.axhline(y=0, color='r', linestyle='--')\n",
    "    plt.title('Errors vs Actual Values')\n",
    "    plt.xlabel('Actual Values')\n",
    "    plt.ylabel('Error')\n",
    "\n",
    "    plt.tight_layout()\n",
    "\n",
    "    # Save plot if directory is specified\n",
    "    if folder_name is not None:\n",
    "        try:\n",
    "            # Create directory if it doesn't exist\n",
    "            filename = f'{folder_name}_error_analysis.png'\n",
    "\n",
    "            # Save figure\n",
    "            plt.savefig(filename, dpi=300, bbox_inches='tight')\n",
    "            print(f\"\\nPlot saved as: {filename}\")\n",
    "        except Exception as e:\n",
    "            print(f\"\\nError saving plot: {str(e)}\")\n",
    "\n",
    "    plt.show()\n",
    "\n",
    "    # Print error statistics\n",
    "    print(\"\\nError Statistics:\")\n",
    "    print(f\"MAE: {np.mean(np.abs(errors)):.4f}\")\n",
    "    print(f\"MSE: {np.mean(errors ** 2):.4f}\")\n",
    "    print(f\"RMSE: {np.sqrt(np.mean(errors ** 2)):.4f}\")\n",
    "    print(f\"Mean error: {np.mean(errors):.4f}\")\n",
    "    print(f\"Error std: {np.std(errors):.4f}\")\n",
    "\n",
    "    # Calculate percentage of errors within thresholds\n",
    "    thresholds = [0.5, 1.0, 1.5, 2.0]\n",
    "    for threshold in thresholds:\n",
    "        within_threshold = np.mean(np.abs(errors) <= threshold) * 100\n",
    "        print(f\"Predictions within ±{threshold}: {within_threshold:.1f}%\")\n",
    "\n",
    "\n",
    "# Example usage\n",
    "plot_error_analysis(y_test, predictions, folder_name=folder_name)"
   ],
   "id": "8a924adc70eb2f30"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0rc1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}