3444 lines
142 KiB
Plaintext
3444 lines
142 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "vwMqHwWTthA4"
|
|
},
|
|
"source": [
|
|
"# Analisi e Previsione della Produzione di Olio d'Oliva\n",
|
|
"\n",
|
|
"Questo notebook esplora la relazione tra i dati meteorologici e la produzione annuale di olio d'oliva, con l'obiettivo di creare un modello predittivo."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"!apt-get update\n",
|
|
"!apt-get install graphviz -y\n",
|
|
"\n",
|
|
"!pip install tensorflow\n",
|
|
"!pip install numpy\n",
|
|
"!pip install pandas\n",
|
|
"\n",
|
|
"!pip install keras\n",
|
|
"!pip install scikit-learn\n",
|
|
"!pip install matplotlib\n",
|
|
"!pip install joblib\n",
|
|
"!pip install pyarrow\n",
|
|
"!pip install fastparquet\n",
|
|
"!pip install scipy\n",
|
|
"!pip install seaborn\n",
|
|
"!pip install tqdm\n",
|
|
"!pip install pydot\n",
|
|
"!pip install tensorflow-io"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "VqHdVCiJthA6",
|
|
"outputId": "d8f830c1-5342-4e11-ac3c-96c535aad5fd"
|
|
},
|
|
"source": [
|
|
"import tensorflow as tf\n",
|
|
"import keras\n",
|
|
"\n",
|
|
"print(f\"Keras version: {keras.__version__}\")\n",
|
|
"print(f\"TensorFlow version: {tf.__version__}\")\n",
|
|
"print(f\"TensorFlow version: {tf.__version__}\")\n",
|
|
"print(f\"CUDA available: {tf.test.is_built_with_cuda()}\")\n",
|
|
"print(f\"GPU devices: {tf.config.list_physical_devices('GPU')}\")\n",
|
|
"\n",
|
|
"# GPU configuration\n",
|
|
"gpus = tf.config.experimental.list_physical_devices('GPU')\n",
|
|
"if gpus:\n",
|
|
" try:\n",
|
|
" for gpu in gpus:\n",
|
|
" tf.config.experimental.set_memory_growth(gpu, True)\n",
|
|
" logical_gpus = tf.config.experimental.list_logical_devices('GPU')\n",
|
|
" print(len(gpus), \"Physical GPUs,\", len(logical_gpus), \"Logical GPUs\")\n",
|
|
" except RuntimeError as e:\n",
|
|
" print(e)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 160
|
|
},
|
|
"id": "cz0NU95IthA7",
|
|
"outputId": "eaf1939a-7708-49ad-adc9-bac4e2448e10"
|
|
},
|
|
"source": [
|
|
"# Test semplice per verificare che la GPU funzioni\n",
|
|
"def test_gpu():\n",
|
|
" print(\"TensorFlow version:\", tf.__version__)\n",
|
|
" print(\"\\nDispositivi disponibili:\")\n",
|
|
" print(tf.config.list_physical_devices())\n",
|
|
"\n",
|
|
" # Creiamo e moltiplichiamo due tensori sulla GPU\n",
|
|
" with tf.device('/GPU:0'):\n",
|
|
" a = tf.random.normal([10000, 10000])\n",
|
|
" b = tf.random.normal([10000, 10000])\n",
|
|
" c = tf.matmul(a, b)\n",
|
|
"\n",
|
|
" print(\"\\nShape del risultato:\", c.shape)\n",
|
|
" print(\"Device del tensore:\", c.device)\n",
|
|
" return \"Test completato con successo!\"\n",
|
|
"\n",
|
|
"\n",
|
|
"test_gpu()"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "VYNuYASythA8"
|
|
},
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import seaborn as sns\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
|
|
"from tensorflow.keras.layers import Input, Dense, Dropout, Bidirectional, LSTM, LayerNormalization, Add, Activation, BatchNormalization, MultiHeadAttention, MaxPooling1D, Conv1D, GlobalMaxPooling1D, GlobalAveragePooling1D, \\\n",
|
|
" Concatenate, ZeroPadding1D, Lambda, AveragePooling1D, concatenate\n",
|
|
"from tensorflow.keras.layers import Dense, LSTM, Conv1D, Input, concatenate, Dropout, BatchNormalization, GlobalAveragePooling1D, Bidirectional, TimeDistributed, Attention, MultiHeadAttention\n",
|
|
"from tensorflow.keras.models import Model\n",
|
|
"from tensorflow.keras.regularizers import l2\n",
|
|
"from tensorflow.keras.optimizers import Adam\n",
|
|
"from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint\n",
|
|
"from datetime import datetime\n",
|
|
"import os\n",
|
|
"import json\n",
|
|
"import joblib\n",
|
|
"import re\n",
|
|
"import pyarrow as pa\n",
|
|
"import pyarrow.parquet as pq\n",
|
|
"from tqdm import tqdm\n",
|
|
"from concurrent.futures import ProcessPoolExecutor, as_completed\n",
|
|
"from functools import partial\n",
|
|
"import psutil\n",
|
|
"import multiprocessing\n",
|
|
"\n",
|
|
"random_state_value = 42\n",
|
|
"\n",
|
|
"base_project_dir = './kaggle/working/'\n",
|
|
"data_project_dir = base_project_dir + 'data/'\n",
|
|
"models_project_dir = base_project_dir + 'models/'\n",
|
|
"\n",
|
|
"os.makedirs(base_project_dir, exist_ok=True)\n",
|
|
"os.makedirs(data_project_dir, exist_ok=True)\n",
|
|
"os.makedirs(models_project_dir, exist_ok=True)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "uHKkULSNthA8"
|
|
},
|
|
"source": [
|
|
"## Funzioni di Plot"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "gzvYVaBPthA8"
|
|
},
|
|
"source": [
|
|
"def save_plot(plt, title, output_dir='./kaggle/working/plots'):\n",
|
|
" os.makedirs(output_dir, exist_ok=True)\n",
|
|
" filename = \"\".join(x for x in title if x.isalnum() or x in [' ', '-', '_']).rstrip()\n",
|
|
" filename = filename.replace(' ', '_').lower()\n",
|
|
" filepath = os.path.join(output_dir, f\"{filename}.png\")\n",
|
|
" plt.savefig(filepath, bbox_inches='tight', dpi=300)\n",
|
|
" print(f\"Plot salvato come: {filepath}\")\n",
|
|
"\n",
|
|
"\n",
|
|
"def to_camel_case(text):\n",
|
|
" \"\"\"\n",
|
|
" Converte una stringa in camelCase.\n",
|
|
" Gestisce stringhe con spazi, trattini o underscore.\n",
|
|
" Se è una sola parola, la restituisce in minuscolo.\n",
|
|
" \"\"\"\n",
|
|
" # Rimuove eventuali spazi iniziali e finali\n",
|
|
" text = text.strip()\n",
|
|
"\n",
|
|
" # Se la stringa è vuota, ritorna stringa vuota\n",
|
|
" if not text:\n",
|
|
" return \"\"\n",
|
|
"\n",
|
|
" # Sostituisce trattini e underscore con spazi\n",
|
|
" text = text.replace('-', ' ').replace('_', ' ')\n",
|
|
"\n",
|
|
" # Divide la stringa in parole\n",
|
|
" words = text.split()\n",
|
|
"\n",
|
|
" # Se non ci sono parole dopo lo split, ritorna stringa vuota\n",
|
|
" if not words:\n",
|
|
" return \"\"\n",
|
|
"\n",
|
|
" # Se c'è una sola parola, ritorna in minuscolo\n",
|
|
" if len(words) == 1:\n",
|
|
" return words[0].lower()\n",
|
|
"\n",
|
|
" # Altrimenti procedi con il camelCase\n",
|
|
" result = words[0].lower()\n",
|
|
" for word in words[1:]:\n",
|
|
" result += word.capitalize()\n",
|
|
"\n",
|
|
" return result"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "lhipxRbMthA8"
|
|
},
|
|
"source": [
|
|
"## 1. Caricamento e preparazione dei Dati Meteo"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Function to convert csv to parquet\n",
|
|
"def csv_to_parquet(csv_file, parquet_file, chunksize=100000):\n",
|
|
" writer = None\n",
|
|
"\n",
|
|
" for chunk in pd.read_csv(csv_file, chunksize=chunksize):\n",
|
|
" if writer is None:\n",
|
|
"\n",
|
|
" table = pa.Table.from_pandas(chunk)\n",
|
|
" writer = pq.ParquetWriter(parquet_file, table.schema)\n",
|
|
" else:\n",
|
|
" table = pa.Table.from_pandas(chunk)\n",
|
|
"\n",
|
|
" writer.write_table(table)\n",
|
|
"\n",
|
|
" if writer:\n",
|
|
" writer.close()\n",
|
|
"\n",
|
|
" print(f\"File conversion completed : {csv_file} -> {parquet_file}\")\n",
|
|
"\n",
|
|
"\n",
|
|
"def read_json_files(folder_path):\n",
|
|
" all_data = []\n",
|
|
"\n",
|
|
" file_list = sorted(os.listdir(folder_path))\n",
|
|
"\n",
|
|
" for filename in file_list:\n",
|
|
" if filename.endswith('.json'):\n",
|
|
" file_path = os.path.join(folder_path, filename)\n",
|
|
" try:\n",
|
|
" with open(file_path, 'r') as file:\n",
|
|
" data = json.load(file)\n",
|
|
" all_data.extend(data['days'])\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Error processing file '{filename}': {str(e)}\")\n",
|
|
"\n",
|
|
" return all_data\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_weather_dataset(data):\n",
|
|
" dataset = []\n",
|
|
" seen_datetimes = set()\n",
|
|
"\n",
|
|
" for day in data:\n",
|
|
" date = day['datetime']\n",
|
|
" for hour in day['hours']:\n",
|
|
" datetime_str = f\"{date} {hour['datetime']}\"\n",
|
|
"\n",
|
|
" # Verifico se questo datetime è già stato visto\n",
|
|
" if datetime_str in seen_datetimes:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" seen_datetimes.add(datetime_str)\n",
|
|
"\n",
|
|
" if isinstance(hour['preciptype'], list):\n",
|
|
" preciptype = \"__\".join(hour['preciptype'])\n",
|
|
" else:\n",
|
|
" preciptype = hour['preciptype'] if hour['preciptype'] else \"\"\n",
|
|
"\n",
|
|
" conditions = hour['conditions'].replace(', ', '__').replace(' ', '_').lower()\n",
|
|
"\n",
|
|
" row = {\n",
|
|
" 'datetime': datetime_str,\n",
|
|
" 'temp': hour['temp'],\n",
|
|
" 'feelslike': hour['feelslike'],\n",
|
|
" 'humidity': hour['humidity'],\n",
|
|
" 'dew': hour['dew'],\n",
|
|
" 'precip': hour['precip'],\n",
|
|
" 'snow': hour['snow'],\n",
|
|
" 'preciptype': preciptype.lower(),\n",
|
|
" 'windspeed': hour['windspeed'],\n",
|
|
" 'winddir': hour['winddir'],\n",
|
|
" 'pressure': hour['pressure'],\n",
|
|
" 'cloudcover': hour['cloudcover'],\n",
|
|
" 'visibility': hour['visibility'],\n",
|
|
" 'solarradiation': hour['solarradiation'],\n",
|
|
" 'solarenergy': hour['solarenergy'],\n",
|
|
" 'uvindex': hour['uvindex'],\n",
|
|
" 'conditions': conditions,\n",
|
|
" 'tempmax': day['tempmax'],\n",
|
|
" 'tempmin': day['tempmin'],\n",
|
|
" 'precipprob': day['precipprob'],\n",
|
|
" 'precipcover': day['precipcover']\n",
|
|
" }\n",
|
|
" dataset.append(row)\n",
|
|
"\n",
|
|
" dataset.sort(key=lambda x: datetime.strptime(x['datetime'], \"%Y-%m-%d %H:%M:%S\"))\n",
|
|
"\n",
|
|
" return pd.DataFrame(dataset)\n",
|
|
"\n"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Crea le sequenze per LSTM\n",
|
|
"def create_sequences(timesteps, X, y=None):\n",
|
|
" \"\"\"\n",
|
|
" Crea sequenze temporali dai dati.\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" X : array-like\n",
|
|
" Dati di input\n",
|
|
" timesteps : int\n",
|
|
" Numero di timestep per ogni sequenza\n",
|
|
" y : array-like, optional\n",
|
|
" Target values. Se None, crea sequenze solo per X\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" tuple o array\n",
|
|
" Se y è fornito: (X_sequences, y_sequences)\n",
|
|
" Se y è None: X_sequences\n",
|
|
" \"\"\"\n",
|
|
" Xs = []\n",
|
|
" for i in range(len(X) - timesteps):\n",
|
|
" Xs.append(X[i:i + timesteps])\n",
|
|
"\n",
|
|
" if y is not None:\n",
|
|
" ys = []\n",
|
|
" for i in range(len(X) - timesteps):\n",
|
|
" ys.append(y[i + timesteps])\n",
|
|
" return np.array(Xs), np.array(ys)\n",
|
|
"\n",
|
|
" return np.array(Xs)\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_season(date):\n",
|
|
" month = date.month\n",
|
|
" day = date.day\n",
|
|
" if (month == 12 and day >= 21) or (month <= 3 and day < 20):\n",
|
|
" return 'Winter'\n",
|
|
" elif (month == 3 and day >= 20) or (month <= 6 and day < 21):\n",
|
|
" return 'Spring'\n",
|
|
" elif (month == 6 and day >= 21) or (month <= 9 and day < 23):\n",
|
|
" return 'Summer'\n",
|
|
" elif (month == 9 and day >= 23) or (month <= 12 and day < 21):\n",
|
|
" return 'Autumn'\n",
|
|
" else:\n",
|
|
" return 'Unknown'\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_time_period(hour):\n",
|
|
" if 5 <= hour < 12:\n",
|
|
" return 'Morning'\n",
|
|
" elif 12 <= hour < 17:\n",
|
|
" return 'Afternoon'\n",
|
|
" elif 17 <= hour < 21:\n",
|
|
" return 'Evening'\n",
|
|
" else:\n",
|
|
" return 'Night'\n",
|
|
"\n",
|
|
"\n",
|
|
"def add_time_features(df):\n",
|
|
" df['datetime'] = pd.to_datetime(df['datetime'])\n",
|
|
" df['timestamp'] = df['datetime'].astype(np.int64) // 10 ** 9\n",
|
|
" df['year'] = df['datetime'].dt.year\n",
|
|
" df['month'] = df['datetime'].dt.month\n",
|
|
" df['day'] = df['datetime'].dt.day\n",
|
|
" df['hour'] = df['datetime'].dt.hour\n",
|
|
" df['minute'] = df['datetime'].dt.minute\n",
|
|
" df['hour_sin'] = np.sin(df['hour'] * (2 * np.pi / 24))\n",
|
|
" df['hour_cos'] = np.cos(df['hour'] * (2 * np.pi / 24))\n",
|
|
" df['day_of_week'] = df['datetime'].dt.dayofweek\n",
|
|
" df['day_of_year'] = df['datetime'].dt.dayofyear\n",
|
|
" df['week_of_year'] = df['datetime'].dt.isocalendar().week.astype(int)\n",
|
|
" df['quarter'] = df['datetime'].dt.quarter\n",
|
|
" df['is_month_end'] = df['datetime'].dt.is_month_end.astype(int)\n",
|
|
" df['is_quarter_end'] = df['datetime'].dt.is_quarter_end.astype(int)\n",
|
|
" df['is_year_end'] = df['datetime'].dt.is_year_end.astype(int)\n",
|
|
" df['month_sin'] = np.sin(df['month'] * (2 * np.pi / 12))\n",
|
|
" df['month_cos'] = np.cos(df['month'] * (2 * np.pi / 12))\n",
|
|
" df['day_of_year_sin'] = np.sin(df['day_of_year'] * (2 * np.pi / 365.25))\n",
|
|
" df['day_of_year_cos'] = np.cos(df['day_of_year'] * (2 * np.pi / 365.25))\n",
|
|
" df['season'] = df['datetime'].apply(get_season)\n",
|
|
" df['time_period'] = df['hour'].apply(get_time_period)\n",
|
|
" return df\n",
|
|
"\n",
|
|
"\n",
|
|
"def add_solar_features(df):\n",
|
|
" # Calcolo dell'angolo solare\n",
|
|
" df['solar_angle'] = np.sin(df['day_of_year'] * (2 * np.pi / 365.25)) * np.sin(df['hour'] * (2 * np.pi / 24))\n",
|
|
"\n",
|
|
" # Interazioni tra features rilevanti\n",
|
|
" df['cloud_temp_interaction'] = df['cloudcover'] * df['temp']\n",
|
|
" df['visibility_cloud_interaction'] = df['visibility'] * (100 - df['cloudcover'])\n",
|
|
"\n",
|
|
" # Feature derivate\n",
|
|
" df['clear_sky_index'] = (100 - df['cloudcover']) / 100\n",
|
|
" df['temp_gradient'] = df['temp'] - df['tempmin']\n",
|
|
"\n",
|
|
" return df\n",
|
|
"\n",
|
|
"\n",
|
|
"def add_solar_specific_features(df):\n",
|
|
" # Angolo solare e durata del giorno\n",
|
|
" df['day_length'] = 12 + 3 * np.sin(2 * np.pi * (df['day_of_year'] - 81) / 365.25)\n",
|
|
" df['solar_noon'] = 12 - df['hour']\n",
|
|
" df['solar_elevation'] = np.sin(2 * np.pi * df['day_of_year'] / 365.25) * np.cos(2 * np.pi * df['solar_noon'] / 24)\n",
|
|
"\n",
|
|
" # Interazioni\n",
|
|
" df['cloud_elevation'] = df['cloudcover'] * df['solar_elevation']\n",
|
|
" df['visibility_elevation'] = df['visibility'] * df['solar_elevation']\n",
|
|
"\n",
|
|
" # Rolling features con finestre più ampie\n",
|
|
" df['cloud_rolling_12h'] = df['cloudcover'].rolling(window=12).mean()\n",
|
|
" df['temp_rolling_12h'] = df['temp'].rolling(window=12).mean()\n",
|
|
"\n",
|
|
" return df\n",
|
|
"\n",
|
|
"\n",
|
|
"def add_advanced_features(df):\n",
|
|
" # Features esistenti\n",
|
|
" df = add_time_features(df)\n",
|
|
" df = add_solar_features(df)\n",
|
|
" df = add_solar_specific_features(df)\n",
|
|
"\n",
|
|
" # Aggiungi interazioni tra variabili meteorologiche\n",
|
|
" df['temp_humidity'] = df['temp'] * df['humidity']\n",
|
|
" df['temp_cloudcover'] = df['temp'] * df['cloudcover']\n",
|
|
" df['visibility_cloudcover'] = df['visibility'] * df['cloudcover']\n",
|
|
"\n",
|
|
" # Features derivate per la radiazione solare\n",
|
|
" df['clear_sky_factor'] = (100 - df['cloudcover']) / 100\n",
|
|
" df['day_length'] = np.sin(df['day_of_year_sin']) * 12 + 12 # approssimazione della durata del giorno\n",
|
|
"\n",
|
|
" # Lag features\n",
|
|
" df['temp_1h_lag'] = df['temp'].shift(1)\n",
|
|
" df['cloudcover_1h_lag'] = df['cloudcover'].shift(1)\n",
|
|
" df['humidity_1h_lag'] = df['humidity'].shift(1)\n",
|
|
"\n",
|
|
" # Rolling means\n",
|
|
" df['temp_rolling_mean_6h'] = df['temp'].rolling(window=6).mean()\n",
|
|
" df['cloudcover_rolling_mean_6h'] = df['cloudcover'].rolling(window=6).mean()\n",
|
|
"\n",
|
|
" return df\n",
|
|
"\n",
|
|
"\n",
|
|
"# Preparazione dati\n",
|
|
"def prepare_solar_data(weather_data, features):\n",
|
|
" \"\"\"\n",
|
|
" Prepara i dati per i modelli solari.\n",
|
|
" \"\"\"\n",
|
|
" # Aggiungi le caratteristiche temporali\n",
|
|
" weather_data = add_advanced_features(weather_data)\n",
|
|
" weather_data = pd.get_dummies(weather_data, columns=['season', 'time_period'], drop_first=True)\n",
|
|
"\n",
|
|
" # Dividi i dati\n",
|
|
" data_after_2010 = weather_data[weather_data['year'] >= 2010].copy()\n",
|
|
" data_after_2010 = data_after_2010.sort_values('datetime')\n",
|
|
" data_after_2010.set_index('datetime', inplace=True)\n",
|
|
"\n",
|
|
" # Interpola valori mancanti\n",
|
|
" target_variables = ['solarradiation', 'solarenergy', 'uvindex']\n",
|
|
" for column in target_variables:\n",
|
|
" data_after_2010[column] = data_after_2010[column].interpolate(method='time')\n",
|
|
"\n",
|
|
" # Rimuovi righe con valori mancanti\n",
|
|
" data_after_2010.dropna(subset=features + target_variables, inplace=True)\n",
|
|
"\n",
|
|
" # Prepara X e y\n",
|
|
" X = data_after_2010[features].values\n",
|
|
" y = data_after_2010[target_variables].values\n",
|
|
"\n",
|
|
" # Normalizza features\n",
|
|
" scaler_X = MinMaxScaler()\n",
|
|
" X_scaled = scaler_X.fit_transform(X)\n",
|
|
"\n",
|
|
" scaler_y = MinMaxScaler()\n",
|
|
" y_scaled = scaler_y.fit_transform(y)\n",
|
|
"\n",
|
|
" return X_scaled, scaler_X, y_scaled, scaler_y, data_after_2010\n",
|
|
"\n",
|
|
"\n",
|
|
"def prepare_model_specific_data(X_scaled, y, target_idx, timesteps):\n",
|
|
" \"\"\"\n",
|
|
" Prepara i dati specifici per ciascun modello.\n",
|
|
" \"\"\"\n",
|
|
" # Scaler specifico per il target\n",
|
|
" scaler_y = MinMaxScaler()\n",
|
|
" y_scaled = scaler_y.fit_transform(y[:, target_idx].reshape(-1, 1))\n",
|
|
"\n",
|
|
" # Split dei dati\n",
|
|
" X_train, X_temp, y_train, y_temp = train_test_split(\n",
|
|
" X_scaled, y_scaled, test_size=0.3, shuffle=False\n",
|
|
" )\n",
|
|
" X_val, X_test, y_val, y_test = train_test_split(\n",
|
|
" X_temp, y_temp, test_size=0.5, shuffle=False\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Crea sequenze\n",
|
|
" X_train_seq, y_train_seq = create_sequences(timesteps, X_train, y_train)\n",
|
|
" X_val_seq, y_val_seq = create_sequences(timesteps, X_val, y_val)\n",
|
|
" X_test_seq, y_test_seq = create_sequences(timesteps, X_test, y_test)\n",
|
|
"\n",
|
|
" return {\n",
|
|
" 'train': (X_train_seq, y_train_seq),\n",
|
|
" 'val': (X_val_seq, y_val_seq),\n",
|
|
" 'test': (X_test_seq, y_test_seq)\n",
|
|
" }, scaler_y\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_radiation_model(input_shape, solar_params_shape=(3,)):\n",
|
|
" \"\"\"\n",
|
|
" Modello per la radiazione solare con vincoli di non-negatività.\n",
|
|
" \"\"\"\n",
|
|
" # Input layers\n",
|
|
" main_input = Input(shape=input_shape, name='main_input')\n",
|
|
" solar_input = Input(shape=solar_params_shape, name='solar_params')\n",
|
|
"\n",
|
|
" # Branch CNN\n",
|
|
" x1 = Conv1D(32, 3, padding='same')(main_input)\n",
|
|
" x1 = BatchNormalization()(x1)\n",
|
|
" x1 = Activation('relu')(x1)\n",
|
|
" x1 = Conv1D(64, 3, padding='same')(x1)\n",
|
|
" x1 = BatchNormalization()(x1)\n",
|
|
" x1 = Activation('relu')(x1)\n",
|
|
" x1 = GlobalAveragePooling1D()(x1)\n",
|
|
"\n",
|
|
" # Branch LSTM\n",
|
|
" x2 = Bidirectional(LSTM(64, return_sequences=True))(main_input)\n",
|
|
" x2 = Bidirectional(LSTM(32))(x2)\n",
|
|
" x2 = BatchNormalization()(x2)\n",
|
|
"\n",
|
|
" # Solar parameters processing\n",
|
|
" x3 = Dense(32)(solar_input)\n",
|
|
" x3 = BatchNormalization()(x3)\n",
|
|
" x3 = Activation('relu')(x3)\n",
|
|
"\n",
|
|
" # Combine all branches\n",
|
|
" x = concatenate([x1, x2, x3])\n",
|
|
"\n",
|
|
" # Dense layers with non-negativity constraints\n",
|
|
" x = Dense(64, kernel_constraint=tf.keras.constraints.NonNeg())(x)\n",
|
|
" x = BatchNormalization()(x)\n",
|
|
" x = Activation('relu')(x)\n",
|
|
" x = Dropout(0.2)(x)\n",
|
|
"\n",
|
|
" x = Dense(32, kernel_constraint=tf.keras.constraints.NonNeg())(x)\n",
|
|
" x = BatchNormalization()(x)\n",
|
|
" x = Activation('relu')(x)\n",
|
|
"\n",
|
|
" # Output layer con vincoli di non-negatività\n",
|
|
" output = Dense(1,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" activation='relu')(x)\n",
|
|
"\n",
|
|
" model = Model(inputs=[main_input, solar_input], outputs=output, name=\"SolarRadiation\")\n",
|
|
" return model\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_energy_model(input_shape):\n",
|
|
" \"\"\"\n",
|
|
" Modello migliorato per l'energia solare che sfrutta la relazione con la radiazione.\n",
|
|
" Include vincoli di non-negatività e migliore gestione delle dipendenze temporali.\n",
|
|
" \"\"\"\n",
|
|
" inputs = Input(shape=input_shape)\n",
|
|
"\n",
|
|
" # Branch 1: Elaborazione temporale con attention\n",
|
|
" # Multi-head attention per catturare relazioni temporali\n",
|
|
" x1 = MultiHeadAttention(num_heads=8, key_dim=32)(inputs, inputs)\n",
|
|
" x1 = BatchNormalization()(x1)\n",
|
|
" x1 = Activation('relu')(x1)\n",
|
|
"\n",
|
|
" # Temporal Convolution branch per catturare pattern locali\n",
|
|
" x2 = Conv1D(\n",
|
|
" filters=64,\n",
|
|
" kernel_size=3,\n",
|
|
" padding='same',\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg()\n",
|
|
" )(inputs)\n",
|
|
" x2 = BatchNormalization()(x2)\n",
|
|
" x2 = Activation('relu')(x2)\n",
|
|
" x2 = Conv1D(\n",
|
|
" filters=32,\n",
|
|
" kernel_size=3,\n",
|
|
" padding='same',\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg()\n",
|
|
" )(x2)\n",
|
|
" x2 = BatchNormalization()(x2)\n",
|
|
" x2 = Activation('relu')(x2)\n",
|
|
"\n",
|
|
" # LSTM branch per memoria a lungo termine\n",
|
|
" x3 = LSTM(64, return_sequences=True)(inputs)\n",
|
|
" x3 = LSTM(32, return_sequences=False)(x3)\n",
|
|
" x3 = BatchNormalization()(x3)\n",
|
|
" x3 = Activation('relu')(x3)\n",
|
|
"\n",
|
|
" # Global pooling per ogni branch\n",
|
|
" x1 = GlobalAveragePooling1D()(x1)\n",
|
|
" x2 = GlobalAveragePooling1D()(x2)\n",
|
|
"\n",
|
|
" # Concatena tutti i branch\n",
|
|
" x = concatenate([x1, x2, x3])\n",
|
|
"\n",
|
|
" # Dense layers con vincoli di non-negatività\n",
|
|
" x = Dense(\n",
|
|
" 128,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" kernel_regularizer=l2(0.01)\n",
|
|
" )(x)\n",
|
|
" x = BatchNormalization()(x)\n",
|
|
" x = Activation('relu')(x)\n",
|
|
" x = Dropout(0.3)(x)\n",
|
|
"\n",
|
|
" x = Dense(\n",
|
|
" 64,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" kernel_regularizer=l2(0.01)\n",
|
|
" )(x)\n",
|
|
" x = BatchNormalization()(x)\n",
|
|
" x = Activation('relu')(x)\n",
|
|
" x = Dropout(0.2)(x)\n",
|
|
"\n",
|
|
" # Output layer con vincolo di non-negatività\n",
|
|
" output = Dense(\n",
|
|
" 1,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" activation='relu', # Garantisce output non negativo\n",
|
|
" kernel_regularizer=l2(0.01)\n",
|
|
" )(x)\n",
|
|
"\n",
|
|
" model = Model(inputs=inputs, outputs=output, name=\"SolarEnergy\")\n",
|
|
" return model\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_uv_model(input_shape):\n",
|
|
" \"\"\"\n",
|
|
" Modello migliorato per l'indice UV che sfrutta sia radiazione che energia solare.\n",
|
|
" Include vincoli di non-negatività e considera le relazioni non lineari tra le variabili.\n",
|
|
" \"\"\"\n",
|
|
" inputs = Input(shape=input_shape)\n",
|
|
"\n",
|
|
" # CNN branch per pattern locali\n",
|
|
" x1 = Conv1D(\n",
|
|
" filters=64,\n",
|
|
" kernel_size=3,\n",
|
|
" padding='same',\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg()\n",
|
|
" )(inputs)\n",
|
|
" x1 = BatchNormalization()(x1)\n",
|
|
" x1 = Activation('relu')(x1)\n",
|
|
" x1 = MaxPooling1D(pool_size=2)(x1)\n",
|
|
"\n",
|
|
" x1 = Conv1D(\n",
|
|
" filters=32,\n",
|
|
" kernel_size=3,\n",
|
|
" padding='same',\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg()\n",
|
|
" )(x1)\n",
|
|
" x1 = BatchNormalization()(x1)\n",
|
|
" x1 = Activation('relu')(x1)\n",
|
|
" x1 = GlobalAveragePooling1D()(x1)\n",
|
|
"\n",
|
|
" # Attention branch per relazioni complesse\n",
|
|
" # Specialmente utile per le relazioni con radiazione ed energia\n",
|
|
" x2 = MultiHeadAttention(num_heads=4, key_dim=32)(inputs, inputs)\n",
|
|
" x2 = BatchNormalization()(x2)\n",
|
|
" x2 = Activation('relu')(x2)\n",
|
|
" x2 = GlobalAveragePooling1D()(x2)\n",
|
|
"\n",
|
|
" # Dense branch per le feature più recenti\n",
|
|
" x3 = GlobalAveragePooling1D()(inputs)\n",
|
|
" x3 = Dense(\n",
|
|
" 64,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" kernel_regularizer=l2(0.01)\n",
|
|
" )(x3)\n",
|
|
" x3 = BatchNormalization()(x3)\n",
|
|
" x3 = Activation('relu')(x3)\n",
|
|
"\n",
|
|
" # Fusion dei branch\n",
|
|
" x = concatenate([x1, x2, x3])\n",
|
|
"\n",
|
|
" # Dense layers con vincoli di non-negatività\n",
|
|
" x = Dense(\n",
|
|
" 128,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" kernel_regularizer=l2(0.01)\n",
|
|
" )(x)\n",
|
|
" x = BatchNormalization()(x)\n",
|
|
" x = Activation('relu')(x)\n",
|
|
" x = Dropout(0.3)(x)\n",
|
|
"\n",
|
|
" x = Dense(\n",
|
|
" 64,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" kernel_regularizer=l2(0.01)\n",
|
|
" )(x)\n",
|
|
" x = BatchNormalization()(x)\n",
|
|
" x = Activation('relu')(x)\n",
|
|
" x = Dropout(0.2)(x)\n",
|
|
"\n",
|
|
" # Output layer con vincolo di non-negatività\n",
|
|
" output = Dense(\n",
|
|
" 1,\n",
|
|
" kernel_constraint=tf.keras.constraints.NonNeg(),\n",
|
|
" activation='relu', # Garantisce output non negativo\n",
|
|
" kernel_regularizer=l2(0.01)\n",
|
|
" )(x)\n",
|
|
"\n",
|
|
" model = Model(inputs=inputs, outputs=output, name=\"SolarUV\")\n",
|
|
" return model\n",
|
|
"\n",
|
|
"\n",
|
|
"class CustomCallback(tf.keras.callbacks.Callback):\n",
|
|
" \"\"\"\n",
|
|
" Callback personalizzato per monitorare la non-negatività delle predizioni\n",
|
|
" e altre metriche importanti durante il training.\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" def __init__(self, validation_data=None):\n",
|
|
" super().__init__()\n",
|
|
" self.validation_data = validation_data\n",
|
|
"\n",
|
|
" def on_epoch_end(self, epoch, logs=None):\n",
|
|
" try:\n",
|
|
" # Controlla se abbiamo i dati di validazione\n",
|
|
" if hasattr(self.model, 'validation_data'):\n",
|
|
" val_x = self.model.validation_data[0]\n",
|
|
" if isinstance(val_x, list): # Per il modello della radiazione\n",
|
|
" val_pred = self.model.predict(val_x, verbose=0)\n",
|
|
" else:\n",
|
|
" val_pred = self.model.predict(val_x, verbose=0)\n",
|
|
"\n",
|
|
" # Verifica non-negatività\n",
|
|
" if np.any(val_pred < 0):\n",
|
|
" print(\"\\nWarning: Rilevati valori negativi nelle predizioni\")\n",
|
|
" print(f\"Min value: {np.min(val_pred)}\")\n",
|
|
"\n",
|
|
" # Statistiche predizioni\n",
|
|
" print(f\"\\nStatistiche predizioni epoca {epoch}:\")\n",
|
|
" print(f\"Min: {np.min(val_pred):.4f}\")\n",
|
|
" print(f\"Max: {np.max(val_pred):.4f}\")\n",
|
|
" print(f\"Media: {np.mean(val_pred):.4f}\")\n",
|
|
"\n",
|
|
" # Aggiunge le metriche ai logs\n",
|
|
" if logs is not None:\n",
|
|
" logs['val_pred_min'] = np.min(val_pred)\n",
|
|
" logs['val_pred_max'] = np.max(val_pred)\n",
|
|
" logs['val_pred_mean'] = np.mean(val_pred)\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"\\nWarning nel CustomCallback: {str(e)}\")\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_callbacks(target):\n",
|
|
" \"\"\"\n",
|
|
" Crea le callbacks per il training del modello.\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" target : str\n",
|
|
" Nome del target per cui creare le callbacks\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" list : Lista delle callbacks configurate\n",
|
|
" \"\"\"\n",
|
|
" # Crea la directory per i checkpoint e i logs\n",
|
|
" model_dir = f'./kaggle/working/models/{target}'\n",
|
|
" checkpoint_dir = os.path.join(model_dir, 'checkpoints')\n",
|
|
" log_dir = os.path.join(model_dir, 'logs')\n",
|
|
"\n",
|
|
" os.makedirs(checkpoint_dir, exist_ok=True)\n",
|
|
" os.makedirs(log_dir, exist_ok=True)\n",
|
|
"\n",
|
|
" return [\n",
|
|
" # Early Stopping\n",
|
|
" EarlyStopping(\n",
|
|
" monitor='val_loss',\n",
|
|
" patience=10,\n",
|
|
" restore_best_weights=True,\n",
|
|
" min_delta=0.0001\n",
|
|
" ),\n",
|
|
" # Reduce LR on Plateau\n",
|
|
" ReduceLROnPlateau(\n",
|
|
" monitor='val_loss',\n",
|
|
" factor=0.5,\n",
|
|
" patience=5,\n",
|
|
" min_lr=1e-6,\n",
|
|
" verbose=1\n",
|
|
" ),\n",
|
|
" # Model Checkpoint\n",
|
|
" ModelCheckpoint(\n",
|
|
" filepath=os.path.join(checkpoint_dir, 'best_model_{epoch:02d}_{val_loss:.4f}.h5'),\n",
|
|
" monitor='val_loss',\n",
|
|
" save_best_only=True,\n",
|
|
" save_weights_only=True,\n",
|
|
" verbose=1\n",
|
|
" ),\n",
|
|
" # TensorBoard\n",
|
|
" tf.keras.callbacks.TensorBoard(\n",
|
|
" log_dir=log_dir,\n",
|
|
" histogram_freq=1,\n",
|
|
" write_graph=True,\n",
|
|
" update_freq='epoch'\n",
|
|
" ),\n",
|
|
" # Custom callback\n",
|
|
" CustomCallback()\n",
|
|
" ]\n",
|
|
"\n",
|
|
"\n",
|
|
"def train_radiation_model(X_train, y_train, X_val, y_val, solar_params_train, solar_params_val,\n",
|
|
" scalers=None, **kwargs):\n",
|
|
" \"\"\"\n",
|
|
" Addestra il modello per la radiazione solare\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" X_train : array-like\n",
|
|
" Dati di input per il training\n",
|
|
" y_train : array-like\n",
|
|
" Target per il training\n",
|
|
" X_val : array-like\n",
|
|
" Dati di input per la validazione\n",
|
|
" y_val : array-like\n",
|
|
" Target per la validazione\n",
|
|
" solar_params_train : array-like\n",
|
|
" Parametri solari per il training\n",
|
|
" solar_params_val : array-like\n",
|
|
" Parametri solari per la validazione\n",
|
|
" scalers : dict, optional\n",
|
|
" Dizionario degli scaler (es. {'X': x_scaler, 'y': y_scaler, 'solar_params': solar_params_scaler})\n",
|
|
" **kwargs : dict\n",
|
|
" Parametri aggiuntivi per il training (epochs, batch_size, etc.)\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" tuple\n",
|
|
" (model, history)\n",
|
|
" \"\"\"\n",
|
|
" print(\"\\nAddestramento modello Radiation...\")\n",
|
|
"\n",
|
|
" # Crea e compila il modello\n",
|
|
" model = create_radiation_model(input_shape=X_train.shape[1:])\n",
|
|
" model.compile(\n",
|
|
" optimizer='adam',\n",
|
|
" loss='mse',\n",
|
|
" metrics=['mae', 'mse']\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Mostra il summary del modello\n",
|
|
" model.summary()\n",
|
|
"\n",
|
|
" # Addestra il modello\n",
|
|
" history = model.fit(\n",
|
|
" [X_train, solar_params_train],\n",
|
|
" y_train,\n",
|
|
" validation_data=([X_val, solar_params_val], y_val),\n",
|
|
" **kwargs\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Salva il modello con tutti gli artefatti\n",
|
|
" save_single_model_and_scalers(\n",
|
|
" model=model,\n",
|
|
" model_name='solarradiation',\n",
|
|
" scalers=scalers,\n",
|
|
" base_path='./models'\n",
|
|
" )\n",
|
|
"\n",
|
|
" print(\"\\nAddestramento completato e modello salvato!\")\n",
|
|
" return model, history\n",
|
|
"\n",
|
|
"\n",
|
|
"def train_energy_model(X_train, y_train, X_val, y_val, scalers=None, **kwargs):\n",
|
|
" \"\"\"\n",
|
|
" Addestra il modello per l'energia solare\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" X_train : array-like\n",
|
|
" Dati di input per il training\n",
|
|
" y_train : array-like\n",
|
|
" Target per il training\n",
|
|
" X_val : array-like\n",
|
|
" Dati di input per la validazione\n",
|
|
" y_val : array-like\n",
|
|
" Target per la validazione\n",
|
|
" scalers : dict, optional\n",
|
|
" Dizionario degli scaler (es. {'X': x_scaler, 'y': y_scaler})\n",
|
|
" **kwargs : dict\n",
|
|
" Parametri aggiuntivi per il training (epochs, batch_size, etc.)\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" tuple\n",
|
|
" (model, history)\n",
|
|
" \"\"\"\n",
|
|
" print(\"\\nAddestramento modello Energy...\")\n",
|
|
"\n",
|
|
" # Crea e compila il modello\n",
|
|
" model = create_energy_model(input_shape=X_train.shape[1:])\n",
|
|
" model.compile(\n",
|
|
" optimizer='adam',\n",
|
|
" loss='mse',\n",
|
|
" metrics=['mae', 'mse']\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Mostra il summary del modello\n",
|
|
" model.summary()\n",
|
|
"\n",
|
|
" # Addestra il modello\n",
|
|
" history = model.fit(\n",
|
|
" X_train,\n",
|
|
" y_train,\n",
|
|
" validation_data=(X_val, y_val),\n",
|
|
" **kwargs\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Salva il modello con tutti gli artefatti\n",
|
|
" save_single_model_and_scalers(\n",
|
|
" model=model,\n",
|
|
" model_name='solarenergy',\n",
|
|
" scalers=scalers,\n",
|
|
" base_path='./models'\n",
|
|
" )\n",
|
|
"\n",
|
|
" print(\"\\nAddestramento completato e modello salvato!\")\n",
|
|
" return model, history\n",
|
|
"\n",
|
|
"\n",
|
|
"def train_uv_model(X_train, y_train, X_val, y_val, scalers=None, **kwargs):\n",
|
|
" \"\"\"\n",
|
|
" Addestra il modello per l'indice UV\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" X_train : array-like\n",
|
|
" Dati di input per il training\n",
|
|
" y_train : array-like\n",
|
|
" Target per il training\n",
|
|
" X_val : array-like\n",
|
|
" Dati di input per la validazione\n",
|
|
" y_val : array-like\n",
|
|
" Target per la validazione\n",
|
|
" scalers : dict, optional\n",
|
|
" Dizionario degli scaler (es. {'X': x_scaler, 'y': y_scaler})\n",
|
|
" **kwargs : dict\n",
|
|
" Parametri aggiuntivi per il training (epochs, batch_size, etc.)\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" tuple\n",
|
|
" (model, history)\n",
|
|
" \"\"\"\n",
|
|
" print(\"\\nAddestramento modello UV...\")\n",
|
|
"\n",
|
|
" # Crea e compila il modello\n",
|
|
" model = create_uv_model(input_shape=X_train.shape[1:])\n",
|
|
" model.compile(\n",
|
|
" optimizer='adam',\n",
|
|
" loss='mse',\n",
|
|
" metrics=['mae', 'mse']\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Mostra il summary del modello\n",
|
|
" model.summary()\n",
|
|
"\n",
|
|
" # Addestra il modello\n",
|
|
" history = model.fit(\n",
|
|
" X_train,\n",
|
|
" y_train,\n",
|
|
" validation_data=(X_val, y_val),\n",
|
|
" **kwargs\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Salva il modello con tutti gli artefatti\n",
|
|
" save_single_model_and_scalers(\n",
|
|
" model=model,\n",
|
|
" model_name='uvindex',\n",
|
|
" scalers=scalers,\n",
|
|
" base_path='./models'\n",
|
|
" )\n",
|
|
"\n",
|
|
" print(\"\\nAddestramento completato e modello salvato!\")\n",
|
|
" return model, history\n",
|
|
"\n",
|
|
"\n",
|
|
"def save_single_model_and_scalers(model, model_name, scalers=None, base_path='./kaggle/working/models'):\n",
|
|
" \"\"\"\n",
|
|
" Salva un singolo modello con tutti i suoi artefatti associati e multipli scaler.\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" model : keras.Model\n",
|
|
" Il modello da salvare\n",
|
|
" model_name : str\n",
|
|
" Nome del modello (es. 'solarradiation', 'solarenergy', 'uvindex')\n",
|
|
" scalers : dict, optional\n",
|
|
" Dizionario degli scaler associati al modello (es. {'X': x_scaler, 'y': y_scaler})\n",
|
|
" base_path : str\n",
|
|
" Percorso base dove salvare il modello\n",
|
|
" \"\"\"\n",
|
|
" if isinstance(base_path, list):\n",
|
|
" base_path = './kaggle/working/models'\n",
|
|
"\n",
|
|
" # Crea la cartella base se non esiste\n",
|
|
" os.makedirs(base_path, exist_ok=True)\n",
|
|
"\n",
|
|
" # Crea la sottocartella per il modello specifico\n",
|
|
" model_path = os.path.join(base_path, model_name)\n",
|
|
" os.makedirs(model_path, exist_ok=True)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" print(f\"\\nSalvataggio modello {model_name}...\")\n",
|
|
"\n",
|
|
" # 1. Salva il modello completo\n",
|
|
" model_file = os.path.join(model_path, 'model.keras')\n",
|
|
" model.save(model_file, save_format='keras')\n",
|
|
" print(f\"- Salvato modello completo: {model_file}\")\n",
|
|
"\n",
|
|
" # 2. Salva i pesi separatamente\n",
|
|
" weights_path = os.path.join(model_path, 'weights')\n",
|
|
" os.makedirs(weights_path, exist_ok=True)\n",
|
|
" weight_file = os.path.join(weights_path, 'weights')\n",
|
|
" model.save_weights(weight_file)\n",
|
|
" print(f\"- Salvati pesi: {weight_file}\")\n",
|
|
"\n",
|
|
" # 3. Salva il plot del modello\n",
|
|
" plot_path = os.path.join(model_path, f'{model_name}_architecture.png')\n",
|
|
" tf.keras.utils.plot_model(\n",
|
|
" model,\n",
|
|
" to_file=plot_path,\n",
|
|
" show_shapes=True,\n",
|
|
" show_layer_names=True,\n",
|
|
" rankdir='TB',\n",
|
|
" expand_nested=True,\n",
|
|
" dpi=150\n",
|
|
" )\n",
|
|
" print(f\"- Salvato plot architettura: {plot_path}\")\n",
|
|
"\n",
|
|
" # 4. Salva il summary del modello\n",
|
|
" summary_path = os.path.join(model_path, f'{model_name}_summary.txt')\n",
|
|
" with open(summary_path, 'w') as f:\n",
|
|
" model.summary(print_fn=lambda x: f.write(x + '\\n'))\n",
|
|
" print(f\"- Salvato summary modello: {summary_path}\")\n",
|
|
"\n",
|
|
" # 5. Salva gli scaler se forniti\n",
|
|
" if scalers is not None:\n",
|
|
" scaler_path = os.path.join(model_path, 'scalers')\n",
|
|
" os.makedirs(scaler_path, exist_ok=True)\n",
|
|
"\n",
|
|
" for scaler_name, scaler in scalers.items():\n",
|
|
" scaler_file = os.path.join(scaler_path, f'{scaler_name}_scaler.joblib')\n",
|
|
" joblib.dump(scaler, scaler_file)\n",
|
|
" print(f\"- Salvato scaler {scaler_name}: {scaler_file}\")\n",
|
|
"\n",
|
|
" # 6. Salva la configurazione del modello\n",
|
|
" model_config = {\n",
|
|
" 'has_solar_params': True if model_name == 'solarradiation' else False,\n",
|
|
" 'scalers': list(scalers.keys()) if scalers else []\n",
|
|
" }\n",
|
|
" config_path = os.path.join(model_path, 'model_config.joblib')\n",
|
|
" joblib.dump(model_config, config_path)\n",
|
|
" print(f\"- Salvata configurazione: {config_path}\")\n",
|
|
"\n",
|
|
" # 7. Crea un README specifico per il modello\n",
|
|
" readme_path = os.path.join(model_path, 'README.txt')\n",
|
|
" with open(readme_path, 'w') as f:\n",
|
|
" f.write(f\"{model_name.upper()} Model Artifacts\\n\")\n",
|
|
" f.write(\"=\" * (len(model_name) + 15) + \"\\n\\n\")\n",
|
|
" f.write(\"Directory structure:\\n\")\n",
|
|
" f.write(\"- model.keras: Complete model\\n\")\n",
|
|
" f.write(\"- weights/: Model weights\\n\")\n",
|
|
" f.write(f\"- {model_name}_architecture.png: Visual representation of model architecture\\n\")\n",
|
|
" f.write(f\"- {model_name}_summary.txt: Detailed model summary\\n\")\n",
|
|
" f.write(\"- model_config.joblib: Model configuration\\n\")\n",
|
|
" if scalers:\n",
|
|
" f.write(\"- scalers/: Directory containing model scalers\\n\")\n",
|
|
" for scaler_name in scalers.keys():\n",
|
|
" f.write(f\" - {scaler_name}_scaler.joblib: {scaler_name} scaler\\n\")\n",
|
|
"\n",
|
|
" print(f\"\\nTutti gli artefatti per {model_name} salvati in: {model_path}\")\n",
|
|
" print(f\"Consulta {readme_path} per i dettagli sulla struttura\")\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Errore nel salvataggio degli artefatti per {model_name}: {str(e)}\")\n",
|
|
" raise\n",
|
|
"\n",
|
|
" return model_path\n",
|
|
"\n",
|
|
"\n",
|
|
"def load_single_model_and_scalers(model_name, base_path='./kaggle/working/models'):\n",
|
|
" \"\"\"\n",
|
|
" Carica un singolo modello con tutti i suoi artefatti e scaler associati.\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" model_name : str\n",
|
|
" Nome del modello da caricare (es. 'solarradiation', 'solarenergy', 'uvindex')\n",
|
|
" base_path : str\n",
|
|
" Percorso base dove sono salvati i modelli\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" tuple\n",
|
|
" (model, scalers, model_config)\n",
|
|
" \"\"\"\n",
|
|
" model_path = os.path.join(base_path, model_name)\n",
|
|
"\n",
|
|
" if not os.path.exists(model_path):\n",
|
|
" print(f\"Directory del modello non trovata: {model_path}\")\n",
|
|
" return None, None, None\n",
|
|
"\n",
|
|
" try:\n",
|
|
" print(f\"\\nCaricamento modello {model_name}...\")\n",
|
|
"\n",
|
|
" # 1. Carica la configurazione del modello\n",
|
|
" config_path = os.path.join(model_path, 'model_config.joblib')\n",
|
|
" try:\n",
|
|
" model_config = joblib.load(config_path)\n",
|
|
" print(\"- Configurazione modello caricata\")\n",
|
|
" except:\n",
|
|
" print(\"! Configurazione modello non trovata, usando configurazione di default\")\n",
|
|
" model_config = {\n",
|
|
" 'has_solar_params': True if model_name == 'solarradiation' else False,\n",
|
|
" 'scalers': ['X', 'y']\n",
|
|
" }\n",
|
|
"\n",
|
|
" # 2. Carica il modello\n",
|
|
" try:\n",
|
|
" # Prima prova a caricare il modello completo\n",
|
|
" model_file = os.path.join(model_path, 'model.keras')\n",
|
|
" model = tf.keras.models.load_model(model_file)\n",
|
|
" print(f\"- Modello caricato da: {model_file}\")\n",
|
|
"\n",
|
|
" # Verifica i pesi\n",
|
|
" weights_path = os.path.join(model_path, 'weights', 'weights')\n",
|
|
" if os.path.exists(weights_path + '.index'):\n",
|
|
" model.load_weights(weights_path)\n",
|
|
" print(\"- Pesi verificati con successo\")\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"! Errore nel caricamento del modello: {str(e)}\")\n",
|
|
" print(\"Tentativo di ricostruzione del modello...\")\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # Ricostruzione del modello\n",
|
|
" if model_name == 'solarradiation':\n",
|
|
" model = create_radiation_model(input_shape=(24, 8))\n",
|
|
" elif model_name == 'solarenergy':\n",
|
|
" model = create_energy_model(input_shape=(24, 8))\n",
|
|
" elif model_name == 'uvindex':\n",
|
|
" model = create_uv_model(input_shape=(24, 8))\n",
|
|
" else:\n",
|
|
" raise ValueError(f\"Tipo di modello non riconosciuto: {model_name}\")\n",
|
|
"\n",
|
|
" # Carica i pesi\n",
|
|
" model.load_weights(weights_path)\n",
|
|
" print(\"- Modello ricostruito dai pesi con successo\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"! Errore nella ricostruzione del modello: {str(e)}\")\n",
|
|
" return None, None, None\n",
|
|
"\n",
|
|
" # 3. Carica gli scaler\n",
|
|
" scalers = {}\n",
|
|
" scaler_path = os.path.join(model_path, 'scalers')\n",
|
|
" if os.path.exists(scaler_path):\n",
|
|
" print(\"\\nCaricamento scaler:\")\n",
|
|
" for scaler_file in os.listdir(scaler_path):\n",
|
|
" if scaler_file.endswith('_scaler.joblib'):\n",
|
|
" scaler_name = scaler_file.replace('_scaler.joblib', '')\n",
|
|
" scaler_file_path = os.path.join(scaler_path, scaler_file)\n",
|
|
" try:\n",
|
|
" scalers[scaler_name] = joblib.load(scaler_file_path)\n",
|
|
" print(f\"- Caricato scaler {scaler_name}\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"! Errore nel caricamento dello scaler {scaler_name}: {str(e)}\")\n",
|
|
" else:\n",
|
|
" print(\"! Directory degli scaler non trovata\")\n",
|
|
"\n",
|
|
" # 4. Verifica integrità del modello\n",
|
|
" try:\n",
|
|
" # Verifica che il modello possa fare predizioni\n",
|
|
" if model_name == 'solarradiation':\n",
|
|
" dummy_input = [np.zeros((1, 24, 8)), np.zeros((1, 3))]\n",
|
|
" else:\n",
|
|
" dummy_input = np.zeros((1, 24, 8))\n",
|
|
"\n",
|
|
" model.predict(dummy_input, verbose=0)\n",
|
|
" print(\"\\n✓ Verifica integrità modello completata con successo\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"\\n! Attenzione: il modello potrebbe non funzionare correttamente: {str(e)}\")\n",
|
|
"\n",
|
|
" # 5. Carica e verifica il summary del modello\n",
|
|
" summary_path = os.path.join(model_path, f'{model_name}_summary.txt')\n",
|
|
" if os.path.exists(summary_path):\n",
|
|
" print(\"\\nSummary del modello disponibile in:\", summary_path)\n",
|
|
"\n",
|
|
" # 6. Verifica il plot dell'architettura\n",
|
|
" plot_path = os.path.join(model_path, f'{model_name}_architecture.png')\n",
|
|
" if os.path.exists(plot_path):\n",
|
|
" print(\"Plot dell'architettura disponibile in:\", plot_path)\n",
|
|
"\n",
|
|
" print(f\"\\nCaricamento di {model_name} completato con successo!\")\n",
|
|
" return model, scalers, model_config\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"\\nErrore critico nel caricamento del modello {model_name}: {str(e)}\")\n",
|
|
" return None, None, None"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"def process_predictions(predictions, scaler, dates, data, target):\n",
|
|
" \"\"\"\n",
|
|
" Processa e salva le predizioni nel DataFrame.\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" predictions : array-like\n",
|
|
" Predizioni scalate\n",
|
|
" scaler : object\n",
|
|
" Scaler per denormalizzare le predizioni\n",
|
|
" dates : array-like\n",
|
|
" Date per le predizioni\n",
|
|
" data : pd.DataFrame\n",
|
|
" DataFrame da aggiornare\n",
|
|
" target : str\n",
|
|
" Nome della variabile target\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" array-like\n",
|
|
" Predizioni denormalizzate\n",
|
|
" \"\"\"\n",
|
|
" # Verifica e gestione NaN nelle predizioni\n",
|
|
" if np.isnan(predictions).any():\n",
|
|
" print(\"ATTENZIONE: Trovati NaN nelle predizioni\")\n",
|
|
" predictions = np.nan_to_num(predictions, 0)\n",
|
|
"\n",
|
|
" # Denormalizza e applica vincolo di non negatività\n",
|
|
" y_pred = scaler.inverse_transform(predictions)\n",
|
|
" y_pred = np.maximum(y_pred, 0)\n",
|
|
"\n",
|
|
" # Aggiorna il DataFrame\n",
|
|
" if len(dates) > len(y_pred):\n",
|
|
" dates = dates[:len(y_pred)]\n",
|
|
" data.loc[dates, target] = y_pred\n",
|
|
"\n",
|
|
" # Stampa statistiche\n",
|
|
" print(f\"\\nStatistiche predizioni per {target}:\")\n",
|
|
" print(f\"Media: {np.mean(y_pred):.2f}\")\n",
|
|
" print(f\"Min: {np.min(y_pred):.2f}\")\n",
|
|
" print(f\"Max: {np.max(y_pred):.2f}\")\n",
|
|
"\n",
|
|
" return y_pred\n",
|
|
"\n",
|
|
"\n",
|
|
"def predict_radiation(data, features, model, scalers, timesteps=24):\n",
|
|
" \"\"\"\n",
|
|
" Effettua predizioni per la radiazione solare.\n",
|
|
" \"\"\"\n",
|
|
" print(\"\\nPredizione Radiazione Solare\")\n",
|
|
" print(\"=\" * 50)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # Prepara features base\n",
|
|
" X_current = scalers['X'].transform(data[features].values)\n",
|
|
" X_seq = create_sequences(timesteps, X_current)\n",
|
|
"\n",
|
|
" # Prepara parametri solari\n",
|
|
" solar_columns = ['solar_angle', 'clear_sky_index', 'solar_elevation']\n",
|
|
" solar_params = data[solar_columns].values\n",
|
|
" solar_params_seq = solar_params[timesteps:len(X_seq) + timesteps]\n",
|
|
"\n",
|
|
" # Effettua predizioni\n",
|
|
" predictions = model.predict(\n",
|
|
" [X_seq, solar_params_seq],\n",
|
|
" batch_size=32,\n",
|
|
" verbose=1\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Processa e salva predizioni\n",
|
|
" dates = data.index[timesteps:]\n",
|
|
" return process_predictions(predictions, scalers['solarradiation'], dates, data, 'solarradiation')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Errore nella predizione della radiazione: {str(e)}\")\n",
|
|
" return np.zeros(len(data) - timesteps)\n",
|
|
"\n",
|
|
"\n",
|
|
"def predict_energy(data, features, model, scalers, radiation_pred=None, timesteps=24):\n",
|
|
" \"\"\"\n",
|
|
" Effettua predizioni per l'energia solare.\n",
|
|
" \"\"\"\n",
|
|
" print(\"\\nPredizione Energia Solare\")\n",
|
|
" print(\"=\" * 50)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # Prepara features base\n",
|
|
" X_current = scalers['X'].transform(data[features].values)\n",
|
|
"\n",
|
|
" # Aggiungi predizioni della radiazione se disponibili\n",
|
|
" if radiation_pred is not None:\n",
|
|
" radiation_scaled = scalers['solarradiation'].transform(radiation_pred.reshape(-1, 1))\n",
|
|
" X_current = np.column_stack([X_current, radiation_scaled])\n",
|
|
"\n",
|
|
" X_seq = create_sequences(timesteps, X_current)\n",
|
|
"\n",
|
|
" # Effettua predizioni\n",
|
|
" predictions = model.predict(X_seq, batch_size=32, verbose=1)\n",
|
|
"\n",
|
|
" # Processa e salva predizioni\n",
|
|
" dates = data.index[timesteps:]\n",
|
|
" return process_predictions(predictions, scalers['solarenergy'], dates, data, 'solarenergy')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Errore nella predizione dell'energia: {str(e)}\")\n",
|
|
" return np.zeros(len(data) - timesteps)\n",
|
|
"\n",
|
|
"\n",
|
|
"def predict_uv(data, features, model, scalers, radiation_pred=None, energy_pred=None, timesteps=24):\n",
|
|
" \"\"\"\n",
|
|
" Effettua predizioni per l'indice UV.\n",
|
|
" \"\"\"\n",
|
|
" print(\"\\nPredizione Indice UV\")\n",
|
|
" print(\"=\" * 50)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # Prepara features base\n",
|
|
" X_current = scalers['X'].transform(data[features].values)\n",
|
|
"\n",
|
|
" # Aggiungi predizioni precedenti se disponibili\n",
|
|
" if radiation_pred is not None:\n",
|
|
" radiation_scaled = scalers['solarradiation'].transform(radiation_pred.reshape(-1, 1))\n",
|
|
" X_current = np.column_stack([X_current, radiation_scaled])\n",
|
|
"\n",
|
|
" if energy_pred is not None:\n",
|
|
" energy_scaled = scalers['solarenergy'].transform(energy_pred.reshape(-1, 1))\n",
|
|
" X_current = np.column_stack([X_current, energy_scaled])\n",
|
|
"\n",
|
|
" X_seq = create_sequences(timesteps, X_current)\n",
|
|
"\n",
|
|
" # Effettua predizioni\n",
|
|
" predictions = model.predict(X_seq, batch_size=32, verbose=1)\n",
|
|
"\n",
|
|
" # Processa e salva predizioni\n",
|
|
" dates = data.index[timesteps:]\n",
|
|
" return process_predictions(predictions, scalers['uvindex'], dates, data, 'uvindex')\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Errore nella predizione dell'UV: {str(e)}\")\n",
|
|
" return np.zeros(len(data) - timesteps)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"def add_olive_water_consumption_correlation(dataset):\n",
|
|
" # Dati simulati per il fabbisogno d'acqua e la correlazione con la temperatura\n",
|
|
" fabbisogno_acqua = {\n",
|
|
" \"Nocellara dell'Etna\": {\"Primavera\": 1200, \"Estate\": 2000, \"Autunno\": 1000, \"Inverno\": 500, \"Temperatura Ottimale\": 18, \"Resistenza\": \"Media\"},\n",
|
|
" \"Leccino\": {\"Primavera\": 1000, \"Estate\": 1800, \"Autunno\": 800, \"Inverno\": 400, \"Temperatura Ottimale\": 20, \"Resistenza\": \"Alta\"},\n",
|
|
" \"Frantoio\": {\"Primavera\": 1100, \"Estate\": 1900, \"Autunno\": 900, \"Inverno\": 450, \"Temperatura Ottimale\": 19, \"Resistenza\": \"Alta\"},\n",
|
|
" \"Coratina\": {\"Primavera\": 1300, \"Estate\": 2200, \"Autunno\": 1100, \"Inverno\": 550, \"Temperatura Ottimale\": 17, \"Resistenza\": \"Media\"},\n",
|
|
" \"Moraiolo\": {\"Primavera\": 1150, \"Estate\": 2100, \"Autunno\": 900, \"Inverno\": 480, \"Temperatura Ottimale\": 18, \"Resistenza\": \"Media\"},\n",
|
|
" \"Pendolino\": {\"Primavera\": 1050, \"Estate\": 1850, \"Autunno\": 850, \"Inverno\": 430, \"Temperatura Ottimale\": 20, \"Resistenza\": \"Alta\"},\n",
|
|
" \"Taggiasca\": {\"Primavera\": 1000, \"Estate\": 1750, \"Autunno\": 800, \"Inverno\": 400, \"Temperatura Ottimale\": 19, \"Resistenza\": \"Alta\"},\n",
|
|
" \"Canino\": {\"Primavera\": 1100, \"Estate\": 1900, \"Autunno\": 900, \"Inverno\": 450, \"Temperatura Ottimale\": 18, \"Resistenza\": \"Media\"},\n",
|
|
" \"Itrana\": {\"Primavera\": 1200, \"Estate\": 2000, \"Autunno\": 1000, \"Inverno\": 500, \"Temperatura Ottimale\": 17, \"Resistenza\": \"Media\"},\n",
|
|
" \"Ogliarola\": {\"Primavera\": 1150, \"Estate\": 1950, \"Autunno\": 900, \"Inverno\": 480, \"Temperatura Ottimale\": 18, \"Resistenza\": \"Media\"},\n",
|
|
" \"Biancolilla\": {\"Primavera\": 1050, \"Estate\": 1800, \"Autunno\": 850, \"Inverno\": 430, \"Temperatura Ottimale\": 19, \"Resistenza\": \"Alta\"}\n",
|
|
" }\n",
|
|
"\n",
|
|
" # Calcola il fabbisogno idrico annuale per ogni varietà\n",
|
|
" for varieta in fabbisogno_acqua:\n",
|
|
" fabbisogno_acqua[varieta][\"Annuale\"] = sum([fabbisogno_acqua[varieta][stagione] for stagione in [\"Primavera\", \"Estate\", \"Autunno\", \"Inverno\"]])\n",
|
|
"\n",
|
|
" # Aggiungiamo le nuove colonne al dataset\n",
|
|
" dataset[\"Fabbisogno Acqua Primavera (m³/ettaro)\"] = dataset[\"Varietà di Olive\"].apply(lambda x: fabbisogno_acqua[x][\"Primavera\"])\n",
|
|
" dataset[\"Fabbisogno Acqua Estate (m³/ettaro)\"] = dataset[\"Varietà di Olive\"].apply(lambda x: fabbisogno_acqua[x][\"Estate\"])\n",
|
|
" dataset[\"Fabbisogno Acqua Autunno (m³/ettaro)\"] = dataset[\"Varietà di Olive\"].apply(lambda x: fabbisogno_acqua[x][\"Autunno\"])\n",
|
|
" dataset[\"Fabbisogno Acqua Inverno (m³/ettaro)\"] = dataset[\"Varietà di Olive\"].apply(lambda x: fabbisogno_acqua[x][\"Inverno\"])\n",
|
|
" dataset[\"Fabbisogno Idrico Annuale (m³/ettaro)\"] = dataset[\"Varietà di Olive\"].apply(lambda x: fabbisogno_acqua[x][\"Annuale\"])\n",
|
|
" dataset[\"Temperatura Ottimale\"] = dataset[\"Varietà di Olive\"].apply(lambda x: fabbisogno_acqua[x][\"Temperatura Ottimale\"])\n",
|
|
" dataset[\"Resistenza alla Siccità\"] = dataset[\"Varietà di Olive\"].apply(lambda x: fabbisogno_acqua[x][\"Resistenza\"])\n",
|
|
"\n",
|
|
" return dataset"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "zOeyz5JHthA_"
|
|
},
|
|
"source": [
|
|
"def preprocess_weather_data(weather_df):\n",
|
|
" # Calcola statistiche mensili per ogni anno\n",
|
|
" monthly_weather = weather_df.groupby(['year', 'month']).agg({\n",
|
|
" 'temp': ['mean', 'min', 'max'],\n",
|
|
" 'humidity': 'mean',\n",
|
|
" 'precip': 'sum',\n",
|
|
" 'windspeed': 'mean',\n",
|
|
" 'cloudcover': 'mean',\n",
|
|
" 'solarradiation': 'sum',\n",
|
|
" 'solarenergy': 'sum',\n",
|
|
" 'uvindex': 'max'\n",
|
|
" }).reset_index()\n",
|
|
"\n",
|
|
" monthly_weather.columns = ['year', 'month'] + [f'{col[0]}_{col[1]}' for col in monthly_weather.columns[2:]]\n",
|
|
" return monthly_weather\n",
|
|
"\n",
|
|
"\n",
|
|
"def get_growth_phase(month):\n",
|
|
" if month in [12, 1, 2]:\n",
|
|
" return 'dormancy'\n",
|
|
" elif month in [3, 4, 5]:\n",
|
|
" return 'flowering'\n",
|
|
" elif month in [6, 7, 8]:\n",
|
|
" return 'fruit_set'\n",
|
|
" else:\n",
|
|
" return 'ripening'\n",
|
|
"\n",
|
|
"\n",
|
|
"def calculate_weather_effect(row, optimal_temp):\n",
|
|
" # Effetti base\n",
|
|
" temp_effect = -0.1 * (row['temp_mean'] - optimal_temp) ** 2\n",
|
|
" rain_effect = -0.05 * (row['precip_sum'] - 600) ** 2 / 10000\n",
|
|
" sun_effect = 0.1 * row['solarenergy_sum'] / 1000\n",
|
|
"\n",
|
|
" # Fattori di scala basati sulla fase di crescita\n",
|
|
" if row['growth_phase'] == 'dormancy':\n",
|
|
" temp_scale = 0.5\n",
|
|
" rain_scale = 0.2\n",
|
|
" sun_scale = 0.1\n",
|
|
" elif row['growth_phase'] == 'flowering':\n",
|
|
" temp_scale = 2.0\n",
|
|
" rain_scale = 1.5\n",
|
|
" sun_scale = 1.0\n",
|
|
" elif row['growth_phase'] == 'fruit_set':\n",
|
|
" temp_scale = 1.5\n",
|
|
" rain_scale = 1.0\n",
|
|
" sun_scale = 0.8\n",
|
|
" else: # ripening\n",
|
|
" temp_scale = 1.0\n",
|
|
" rain_scale = 0.5\n",
|
|
" sun_scale = 1.2\n",
|
|
"\n",
|
|
" # Calcolo dell'effetto combinato\n",
|
|
" combined_effect = (\n",
|
|
" temp_scale * temp_effect +\n",
|
|
" rain_scale * rain_effect +\n",
|
|
" sun_scale * sun_effect\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Aggiustamenti specifici per fase\n",
|
|
" if row['growth_phase'] == 'flowering':\n",
|
|
" combined_effect -= 0.5 * max(0, row['precip_sum'] - 50) # Penalità per pioggia eccessiva durante la fioritura\n",
|
|
" elif row['growth_phase'] == 'fruit_set':\n",
|
|
" combined_effect += 0.3 * max(0, row['temp_mean'] - (optimal_temp + 5)) # Bonus per temperature più alte durante la formazione dei frutti\n",
|
|
"\n",
|
|
" return combined_effect\n",
|
|
"\n",
|
|
"\n",
|
|
"def calculate_water_need(weather_data, base_need, optimal_temp):\n",
|
|
" # Calcola il fabbisogno idrico basato su temperatura e precipitazioni\n",
|
|
" temp_factor = 1 + 0.05 * (weather_data['temp_mean'] - optimal_temp) # Aumenta del 5% per ogni grado sopra l'ottimale\n",
|
|
" rain_factor = 1 - 0.001 * weather_data['precip_sum'] # Diminuisce leggermente con l'aumentare delle precipitazioni\n",
|
|
" return base_need * temp_factor * rain_factor\n",
|
|
"\n",
|
|
"\n",
|
|
"def clean_column_name(name):\n",
|
|
" # Rimuove caratteri speciali e spazi, converte in snake_case e abbrevia\n",
|
|
" name = re.sub(r'[^a-zA-Z0-9\\s]', '', name) # Rimuove caratteri speciali\n",
|
|
" name = name.lower().replace(' ', '_') # Converte in snake_case\n",
|
|
"\n",
|
|
" # Abbreviazioni comuni\n",
|
|
" abbreviations = {\n",
|
|
" 'production': 'prod',\n",
|
|
" 'percentage': 'pct',\n",
|
|
" 'hectare': 'ha',\n",
|
|
" 'tonnes': 't',\n",
|
|
" 'litres': 'l',\n",
|
|
" 'minimum': 'min',\n",
|
|
" 'maximum': 'max',\n",
|
|
" 'average': 'avg'\n",
|
|
" }\n",
|
|
"\n",
|
|
" for full, abbr in abbreviations.items():\n",
|
|
" name = name.replace(full, abbr)\n",
|
|
"\n",
|
|
" return name\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_technique_mapping(olive_varieties, mapping_path='./kaggle/working/models/technique_mapping.joblib'):\n",
|
|
" # Estrai tutte le tecniche uniche dal dataset e convertile in lowercase\n",
|
|
" all_techniques = olive_varieties['Tecnica di Coltivazione'].str.lower().unique()\n",
|
|
"\n",
|
|
" # Crea il mapping partendo da 1\n",
|
|
" technique_mapping = {tech: i + 1 for i, tech in enumerate(sorted(all_techniques))}\n",
|
|
"\n",
|
|
" # Salva il mapping\n",
|
|
" os.makedirs(os.path.dirname(mapping_path), exist_ok=True)\n",
|
|
" joblib.dump(technique_mapping, mapping_path)\n",
|
|
"\n",
|
|
" return technique_mapping\n",
|
|
"\n",
|
|
"\n",
|
|
"def encode_techniques(df, mapping_path='./kaggle/working/models/technique_mapping.joblib'):\n",
|
|
" if not os.path.exists(mapping_path):\n",
|
|
" raise FileNotFoundError(f\"Mapping not found at {mapping_path}. Run create_technique_mapping first.\")\n",
|
|
"\n",
|
|
" technique_mapping = joblib.load(mapping_path)\n",
|
|
"\n",
|
|
" # Trova tutte le colonne delle tecniche\n",
|
|
" tech_columns = [col for col in df.columns if col.endswith('_tech')]\n",
|
|
"\n",
|
|
" # Applica il mapping a tutte le colonne delle tecniche\n",
|
|
" for col in tech_columns:\n",
|
|
" df[col] = df[col].str.lower().map(technique_mapping).fillna(0).astype(int)\n",
|
|
"\n",
|
|
" return df\n",
|
|
"\n",
|
|
"\n",
|
|
"def decode_techniques(df, mapping_path='./kaggle/working/models/technique_mapping.joblib'):\n",
|
|
" if not os.path.exists(mapping_path):\n",
|
|
" raise FileNotFoundError(f\"Mapping not found at {mapping_path}\")\n",
|
|
"\n",
|
|
" technique_mapping = joblib.load(mapping_path)\n",
|
|
" reverse_mapping = {v: k for k, v in technique_mapping.items()}\n",
|
|
" reverse_mapping[0] = '' # Aggiungi un mapping per 0 a stringa vuota\n",
|
|
"\n",
|
|
" # Trova tutte le colonne delle tecniche\n",
|
|
" tech_columns = [col for col in df.columns if col.endswith('_tech')]\n",
|
|
"\n",
|
|
" # Applica il reverse mapping a tutte le colonne delle tecniche\n",
|
|
" for col in tech_columns:\n",
|
|
" df[col] = df[col].map(reverse_mapping)\n",
|
|
"\n",
|
|
" return df\n",
|
|
"\n",
|
|
"\n",
|
|
"def decode_single_technique(technique_value, mapping_path='./kaggle/working/models/technique_mapping.joblib'):\n",
|
|
" if not os.path.exists(mapping_path):\n",
|
|
" raise FileNotFoundError(f\"Mapping not found at {mapping_path}\")\n",
|
|
"\n",
|
|
" technique_mapping = joblib.load(mapping_path)\n",
|
|
" reverse_mapping = {v: k for k, v in technique_mapping.items()}\n",
|
|
" reverse_mapping[0] = ''\n",
|
|
"\n",
|
|
" return reverse_mapping.get(technique_value, '')"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"def get_optimal_workers():\n",
|
|
" \"\"\"\n",
|
|
" Calcola il numero ottimale di workers basandosi sulle risorse del sistema.\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" int: Numero ottimale di workers\n",
|
|
" \"\"\"\n",
|
|
" # Ottiene il numero di CPU logiche (inclusi i thread virtuali)\n",
|
|
" cpu_count = multiprocessing.cpu_count()\n",
|
|
"\n",
|
|
" # Ottiene la memoria totale e disponibile in GB\n",
|
|
" memory = psutil.virtual_memory()\n",
|
|
" total_memory_gb = memory.total / (1024 ** 3)\n",
|
|
" available_memory_gb = memory.available / (1024 ** 3)\n",
|
|
"\n",
|
|
" # Stima della memoria necessaria per worker (esempio: 2GB per worker)\n",
|
|
" memory_per_worker_gb = 2\n",
|
|
"\n",
|
|
" # Calcola il numero massimo di workers basato sulla memoria disponibile\n",
|
|
" max_workers_by_memory = int(available_memory_gb / memory_per_worker_gb)\n",
|
|
"\n",
|
|
" # Usa il minimo tra:\n",
|
|
" # - numero di CPU disponibili - 1 (lascia una CPU libera per il sistema)\n",
|
|
" # - numero massimo di workers basato sulla memoria\n",
|
|
" # - un limite massimo arbitrario (es. 16) per evitare troppo overhead\n",
|
|
" optimal_workers = min(\n",
|
|
" cpu_count - 1,\n",
|
|
" max_workers_by_memory,\n",
|
|
" 32 # limite massimo arbitrario\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Assicura almeno 1 worker\n",
|
|
" return max(1, optimal_workers)\n",
|
|
"\n",
|
|
"\n",
|
|
"def simulate_zone(base_weather, olive_varieties, year, zone, all_varieties, variety_techniques):\n",
|
|
" \"\"\"\n",
|
|
" Simula la produzione di olive per una singola zona.\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" base_weather: DataFrame con dati meteo di base per l'anno selezionato\n",
|
|
" olive_varieties: DataFrame con le informazioni sulle varietà di olive\n",
|
|
" zone: ID della zona\n",
|
|
" all_varieties: Array con tutte le varietà disponibili\n",
|
|
" variety_techniques: Dict con le tecniche disponibili per ogni varietà\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" Dict con i risultati della simulazione per la zona\n",
|
|
" \"\"\"\n",
|
|
" # Crea una copia dei dati meteo per questa zona specifica\n",
|
|
" zone_weather = base_weather.copy()\n",
|
|
"\n",
|
|
" # Genera variazioni meteorologiche specifiche per questa zona\n",
|
|
" zone_weather['temp_mean'] *= np.random.uniform(0.95, 1.05, len(zone_weather))\n",
|
|
" zone_weather['precip_sum'] *= np.random.uniform(0.9, 1.1, len(zone_weather))\n",
|
|
" zone_weather['solarenergy_sum'] *= np.random.uniform(0.95, 1.05, len(zone_weather))\n",
|
|
"\n",
|
|
" # Genera caratteristiche specifiche della zona\n",
|
|
" num_varieties = np.random.randint(1, 4) # 1-3 varietà per zona\n",
|
|
" selected_varieties = np.random.choice(all_varieties, size=num_varieties, replace=False)\n",
|
|
" hectares = np.random.uniform(1, 10) # Dimensione del terreno\n",
|
|
" percentages = np.random.dirichlet(np.ones(num_varieties)) # Distribuzione delle varietà\n",
|
|
"\n",
|
|
" # Inizializzazione contatori annuali\n",
|
|
" annual_production = 0\n",
|
|
" annual_min_oil = 0\n",
|
|
" annual_max_oil = 0\n",
|
|
" annual_avg_oil = 0\n",
|
|
" annual_water_need = 0\n",
|
|
"\n",
|
|
" # Inizializzazione dizionario dati varietà\n",
|
|
" variety_data = {clean_column_name(variety): {\n",
|
|
" 'tech': '',\n",
|
|
" 'pct': 0,\n",
|
|
" 'prod_t_ha': 0,\n",
|
|
" 'oil_prod_t_ha': 0,\n",
|
|
" 'oil_prod_l_ha': 0,\n",
|
|
" 'min_yield_pct': 0,\n",
|
|
" 'max_yield_pct': 0,\n",
|
|
" 'min_oil_prod_l_ha': 0,\n",
|
|
" 'max_oil_prod_l_ha': 0,\n",
|
|
" 'avg_oil_prod_l_ha': 0,\n",
|
|
" 'l_per_t': 0,\n",
|
|
" 'min_l_per_t': 0,\n",
|
|
" 'max_l_per_t': 0,\n",
|
|
" 'avg_l_per_t': 0,\n",
|
|
" 'olive_prod': 0,\n",
|
|
" 'min_oil_prod': 0,\n",
|
|
" 'max_oil_prod': 0,\n",
|
|
" 'avg_oil_prod': 0,\n",
|
|
" 'water_need': 0\n",
|
|
" } for variety in all_varieties}\n",
|
|
"\n",
|
|
" # Simula produzione per ogni varietà selezionata\n",
|
|
" for i, variety in enumerate(selected_varieties):\n",
|
|
" # Seleziona tecnica di coltivazione casuale per questa varietà\n",
|
|
" technique = np.random.choice(variety_techniques[variety])\n",
|
|
" percentage = percentages[i]\n",
|
|
"\n",
|
|
" # Ottieni informazioni specifiche della varietà\n",
|
|
" variety_info = olive_varieties[\n",
|
|
" (olive_varieties['Varietà di Olive'] == variety) &\n",
|
|
" (olive_varieties['Tecnica di Coltivazione'] == technique)\n",
|
|
" ].iloc[0]\n",
|
|
"\n",
|
|
" # Calcola produzione base con variabilità\n",
|
|
" base_production = variety_info['Produzione (tonnellate/ettaro)'] * 1000 * percentage * hectares / 12\n",
|
|
" base_production *= np.random.uniform(0.9, 1.1)\n",
|
|
"\n",
|
|
" # Calcola effetti meteo sulla produzione\n",
|
|
" weather_effect = zone_weather.apply(\n",
|
|
" lambda row: calculate_weather_effect(row, variety_info['Temperatura Ottimale']),\n",
|
|
" axis=1\n",
|
|
" )\n",
|
|
" monthly_production = base_production * (1 + weather_effect / 10000)\n",
|
|
" monthly_production *= np.random.uniform(0.95, 1.05, len(zone_weather))\n",
|
|
"\n",
|
|
" # Calcola produzione annuale per questa varietà\n",
|
|
" annual_variety_production = monthly_production.sum()\n",
|
|
"\n",
|
|
" # Calcola rese di olio con variabilità\n",
|
|
" min_yield_factor = np.random.uniform(0.95, 1.05)\n",
|
|
" max_yield_factor = np.random.uniform(0.95, 1.05)\n",
|
|
" avg_yield_factor = (min_yield_factor + max_yield_factor) / 2\n",
|
|
"\n",
|
|
" min_oil_production = annual_variety_production * variety_info['Min Litri per Tonnellata'] / 1000 * min_yield_factor\n",
|
|
" max_oil_production = annual_variety_production * variety_info['Max Litri per Tonnellata'] / 1000 * max_yield_factor\n",
|
|
" avg_oil_production = annual_variety_production * variety_info['Media Litri per Tonnellata'] / 1000 * avg_yield_factor\n",
|
|
"\n",
|
|
" # Calcola fabbisogno idrico\n",
|
|
" base_water_need = (\n",
|
|
" variety_info['Fabbisogno Acqua Primavera (m³/ettaro)'] +\n",
|
|
" variety_info['Fabbisogno Acqua Estate (m³/ettaro)'] +\n",
|
|
" variety_info['Fabbisogno Acqua Autunno (m³/ettaro)'] +\n",
|
|
" variety_info['Fabbisogno Acqua Inverno (m³/ettaro)']\n",
|
|
" ) / 4\n",
|
|
"\n",
|
|
" monthly_water_need = zone_weather.apply(\n",
|
|
" lambda row: calculate_water_need(row, base_water_need, variety_info['Temperatura Ottimale']),\n",
|
|
" axis=1\n",
|
|
" )\n",
|
|
" monthly_water_need *= np.random.uniform(0.95, 1.05, len(monthly_water_need))\n",
|
|
" annual_variety_water_need = monthly_water_need.sum() * percentage * hectares\n",
|
|
"\n",
|
|
" # Aggiorna totali annuali\n",
|
|
" annual_production += annual_variety_production\n",
|
|
" annual_min_oil += min_oil_production\n",
|
|
" annual_max_oil += max_oil_production\n",
|
|
" annual_avg_oil += avg_oil_production\n",
|
|
" annual_water_need += annual_variety_water_need\n",
|
|
"\n",
|
|
" # Aggiorna dati varietà\n",
|
|
" clean_variety = clean_column_name(variety)\n",
|
|
" variety_data[clean_variety].update({\n",
|
|
" 'tech': clean_column_name(technique),\n",
|
|
" 'pct': percentage,\n",
|
|
" 'prod_t_ha': variety_info['Produzione (tonnellate/ettaro)'] * np.random.uniform(0.95, 1.05),\n",
|
|
" 'oil_prod_t_ha': variety_info['Produzione Olio (tonnellate/ettaro)'] * np.random.uniform(0.95, 1.05),\n",
|
|
" 'oil_prod_l_ha': variety_info['Produzione Olio (litri/ettaro)'] * np.random.uniform(0.95, 1.05),\n",
|
|
" 'min_yield_pct': variety_info['Min % Resa'] * min_yield_factor,\n",
|
|
" 'max_yield_pct': variety_info['Max % Resa'] * max_yield_factor,\n",
|
|
" 'min_oil_prod_l_ha': variety_info['Min Produzione Olio (litri/ettaro)'] * min_yield_factor,\n",
|
|
" 'max_oil_prod_l_ha': variety_info['Max Produzione Olio (litri/ettaro)'] * max_yield_factor,\n",
|
|
" 'avg_oil_prod_l_ha': variety_info['Media Produzione Olio (litri/ettaro)'] * avg_yield_factor,\n",
|
|
" 'l_per_t': variety_info['Litri per Tonnellata'] * np.random.uniform(0.98, 1.02),\n",
|
|
" 'min_l_per_t': variety_info['Min Litri per Tonnellata'] * min_yield_factor,\n",
|
|
" 'max_l_per_t': variety_info['Max Litri per Tonnellata'] * max_yield_factor,\n",
|
|
" 'avg_l_per_t': variety_info['Media Litri per Tonnellata'] * avg_yield_factor,\n",
|
|
" 'olive_prod': annual_variety_production,\n",
|
|
" 'min_oil_prod': min_oil_production,\n",
|
|
" 'max_oil_prod': max_oil_production,\n",
|
|
" 'avg_oil_prod': avg_oil_production,\n",
|
|
" 'water_need': annual_variety_water_need\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Appiattisci i dati delle varietà\n",
|
|
" flattened_variety_data = {\n",
|
|
" f'{variety}_{key}': value\n",
|
|
" for variety, data in variety_data.items()\n",
|
|
" for key, value in data.items()\n",
|
|
" }\n",
|
|
"\n",
|
|
" # Restituisci il risultato della zona\n",
|
|
" return {\n",
|
|
" 'year': year,\n",
|
|
" 'zone_id': zone + 1,\n",
|
|
" 'temp_mean': zone_weather['temp_mean'].mean(),\n",
|
|
" 'precip_sum': zone_weather['precip_sum'].sum(),\n",
|
|
" 'solar_energy_sum': zone_weather['solarenergy_sum'].sum(),\n",
|
|
" 'ha': hectares,\n",
|
|
" 'zone': f\"zone_{zone + 1}\",\n",
|
|
" 'olive_prod': annual_production,\n",
|
|
" 'min_oil_prod': annual_min_oil,\n",
|
|
" 'max_oil_prod': annual_max_oil,\n",
|
|
" 'avg_oil_prod': annual_avg_oil,\n",
|
|
" 'total_water_need': annual_water_need,\n",
|
|
" **flattened_variety_data\n",
|
|
" }\n",
|
|
"\n",
|
|
"\n",
|
|
"def simulate_olive_production_parallel(weather_data, olive_varieties, num_simulations=5,\n",
|
|
" random_seed=None, max_workers=None, batch_size=500,\n",
|
|
" output_path=\"./kaggle/working/data/simulated_data.parquet\"):\n",
|
|
" \"\"\"\n",
|
|
" Versione ottimizzata della simulazione che salva i risultati in un unico file parquet partizionato\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" weather_data : DataFrame\n",
|
|
" Dati meteorologici di input\n",
|
|
" olive_varieties : DataFrame\n",
|
|
" Dati sulle varietà di olive\n",
|
|
" num_simulations : int\n",
|
|
" Numero totale di simulazioni da eseguire\n",
|
|
" random_seed : int, optional\n",
|
|
" Seed per la riproducibilità\n",
|
|
" max_workers : int, optional\n",
|
|
" Numero massimo di workers per la parallelizzazione\n",
|
|
" batch_size : int\n",
|
|
" Dimensione di ogni batch di simulazioni\n",
|
|
" output_path : str\n",
|
|
" Percorso del file parquet di output (includerà le partizioni)\n",
|
|
" \"\"\"\n",
|
|
" import os\n",
|
|
" from math import ceil\n",
|
|
"\n",
|
|
" if random_seed is not None:\n",
|
|
" np.random.seed(random_seed)\n",
|
|
"\n",
|
|
" # Preparazione dati\n",
|
|
" create_technique_mapping(olive_varieties)\n",
|
|
" monthly_weather = preprocess_weather_data(weather_data)\n",
|
|
" all_varieties = olive_varieties['Varietà di Olive'].unique()\n",
|
|
" variety_techniques = {\n",
|
|
" variety: olive_varieties[olive_varieties['Varietà di Olive'] == variety]['Tecnica di Coltivazione'].unique()\n",
|
|
" for variety in all_varieties\n",
|
|
" }\n",
|
|
"\n",
|
|
" # Calcolo workers ottimali se non specificati\n",
|
|
" if max_workers is None:\n",
|
|
" max_workers = get_optimal_workers() or 1\n",
|
|
" print(f\"Utilizzando {max_workers} workers basati sulle risorse del sistema\")\n",
|
|
"\n",
|
|
" # Calcolo del numero di batch necessari\n",
|
|
" num_batches = ceil(num_simulations / batch_size)\n",
|
|
" print(f\"Elaborazione di {num_simulations} simulazioni in {num_batches} batch\")\n",
|
|
"\n",
|
|
" # Crea directory parent se non esiste\n",
|
|
" os.makedirs(os.path.dirname(output_path), exist_ok=True)\n",
|
|
"\n",
|
|
" for batch_num in range(num_batches):\n",
|
|
" start_sim = batch_num * batch_size\n",
|
|
" end_sim = min((batch_num + 1) * batch_size, num_simulations)\n",
|
|
" current_batch_size = end_sim - start_sim\n",
|
|
"\n",
|
|
" batch_results = []\n",
|
|
"\n",
|
|
" # Parallelizzazione usando ProcessPoolExecutor\n",
|
|
" with ProcessPoolExecutor(max_workers=max_workers) as executor:\n",
|
|
" with tqdm(total=current_batch_size * current_batch_size,\n",
|
|
" desc=f\"Batch {batch_num + 1}/{num_batches}\") as pbar:\n",
|
|
"\n",
|
|
" future_to_sim_id = {}\n",
|
|
"\n",
|
|
" # Sottometti i lavori per il batch corrente\n",
|
|
" for sim in range(start_sim, end_sim):\n",
|
|
" selected_year = np.random.choice(monthly_weather['year'].unique())\n",
|
|
" base_weather = monthly_weather[monthly_weather['year'] == selected_year].copy()\n",
|
|
" base_weather.loc[:, 'growth_phase'] = base_weather['month'].apply(get_growth_phase)\n",
|
|
"\n",
|
|
" for zone in range(current_batch_size):\n",
|
|
" future = executor.submit(\n",
|
|
" simulate_zone,\n",
|
|
" base_weather=base_weather,\n",
|
|
" olive_varieties=olive_varieties,\n",
|
|
" year=selected_year,\n",
|
|
" zone=zone,\n",
|
|
" all_varieties=all_varieties,\n",
|
|
" variety_techniques=variety_techniques\n",
|
|
" )\n",
|
|
" future_to_sim_id[future] = sim + 1\n",
|
|
"\n",
|
|
" # Raccogli i risultati del batch\n",
|
|
" for future in as_completed(future_to_sim_id.keys()):\n",
|
|
" sim_id = future_to_sim_id[future]\n",
|
|
" try:\n",
|
|
" result = future.result()\n",
|
|
" result['simulation_id'] = sim_id\n",
|
|
" result['batch_id'] = batch_num # Aggiungiamo batch_id per il partizionamento\n",
|
|
" batch_results.append(result)\n",
|
|
" pbar.update(1)\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Errore nella simulazione {sim_id}: {str(e)}\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # Converti i risultati del batch in DataFrame\n",
|
|
" batch_df = pd.DataFrame(batch_results)\n",
|
|
"\n",
|
|
" # Salva il batch come partizione del file parquet\n",
|
|
" batch_df.to_parquet(\n",
|
|
" output_path,\n",
|
|
" partition_cols=['batch_id'], # Partiziona per batch_id\n",
|
|
" append=batch_num > 0 # Appendi se non è il primo batch\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Libera memoria\n",
|
|
" del batch_results\n",
|
|
" del batch_df\n",
|
|
"\n",
|
|
" print(f\"Simulazione completata. I dati sono stati salvati in: {output_path}\")\n",
|
|
"\n",
|
|
"\n",
|
|
"# Funzione per visualizzare il mapping delle tecniche\n",
|
|
"def print_technique_mapping(mapping_path='./kaggle/working/models/technique_mapping.joblib'):\n",
|
|
" if not os.path.exists(mapping_path):\n",
|
|
" print(\"Mapping file not found.\")\n",
|
|
" return\n",
|
|
"\n",
|
|
" mapping = joblib.load(mapping_path)\n",
|
|
" print(\"Technique Mapping:\")\n",
|
|
" for technique, code in mapping.items():\n",
|
|
" print(f\"{technique}: {code}\")"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"def clean_column_names(df):\n",
|
|
" # Funzione per pulire i nomi delle colonne\n",
|
|
" new_columns = []\n",
|
|
" for col in df.columns:\n",
|
|
" # Usa regex per separare le varietà\n",
|
|
" varieties = re.findall(r'([a-z]+)_([a-z_]+)', col)\n",
|
|
" if varieties:\n",
|
|
" new_columns.append(f\"{varieties[0][0]}_{varieties[0][1]}\")\n",
|
|
" else:\n",
|
|
" new_columns.append(col)\n",
|
|
" return new_columns\n",
|
|
"\n",
|
|
"\n",
|
|
"def prepare_comparison_data(simulated_data, olive_varieties):\n",
|
|
" # Pulisci i nomi delle colonne\n",
|
|
" df = simulated_data.copy()\n",
|
|
"\n",
|
|
" df.columns = clean_column_names(df)\n",
|
|
" df = encode_techniques(df)\n",
|
|
"\n",
|
|
" all_varieties = olive_varieties['Varietà di Olive'].unique()\n",
|
|
" varieties = [clean_column_name(variety) for variety in all_varieties]\n",
|
|
" comparison_data = []\n",
|
|
"\n",
|
|
" for variety in varieties:\n",
|
|
" olive_prod_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_olive_prod')), None)\n",
|
|
" oil_prod_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_avg_oil_prod')), None)\n",
|
|
" tech_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_tech')), None)\n",
|
|
" water_need_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_water_need')), None)\n",
|
|
"\n",
|
|
" if olive_prod_col and oil_prod_col and tech_col and water_need_col:\n",
|
|
" variety_data = df[[olive_prod_col, oil_prod_col, tech_col, water_need_col]]\n",
|
|
" variety_data = variety_data[variety_data[tech_col] != 0] # Esclude le righe dove la tecnica è 0\n",
|
|
"\n",
|
|
" if not variety_data.empty:\n",
|
|
" avg_olive_prod = pd.to_numeric(variety_data[olive_prod_col], errors='coerce').mean()\n",
|
|
" avg_oil_prod = pd.to_numeric(variety_data[oil_prod_col], errors='coerce').mean()\n",
|
|
" avg_water_need = pd.to_numeric(variety_data[water_need_col], errors='coerce').mean()\n",
|
|
" efficiency = avg_oil_prod / avg_olive_prod if avg_olive_prod > 0 else 0\n",
|
|
" water_efficiency = avg_oil_prod / avg_water_need if avg_water_need > 0 else 0\n",
|
|
"\n",
|
|
" comparison_data.append({\n",
|
|
" 'Variety': variety,\n",
|
|
" 'Avg Olive Production (kg/ha)': avg_olive_prod,\n",
|
|
" 'Avg Oil Production (L/ha)': avg_oil_prod,\n",
|
|
" 'Avg Water Need (m³/ha)': avg_water_need,\n",
|
|
" 'Oil Efficiency (L/kg)': efficiency,\n",
|
|
" 'Water Efficiency (L oil/m³ water)': water_efficiency\n",
|
|
" })\n",
|
|
"\n",
|
|
" return pd.DataFrame(comparison_data)\n",
|
|
"\n",
|
|
"\n",
|
|
"def plot_variety_comparison(comparison_data, metric):\n",
|
|
" plt.figure(figsize=(12, 6))\n",
|
|
" bars = plt.bar(comparison_data['Variety'], comparison_data[metric])\n",
|
|
" plt.title(f'Comparison of {metric} across Olive Varieties')\n",
|
|
" plt.xlabel('Variety')\n",
|
|
" plt.ylabel(metric)\n",
|
|
" plt.xticks(rotation=45, ha='right')\n",
|
|
"\n",
|
|
" for bar in bars:\n",
|
|
" height = bar.get_height()\n",
|
|
" plt.text(bar.get_x() + bar.get_width() / 2., height,\n",
|
|
" f'{height:.2f}',\n",
|
|
" ha='center', va='bottom')\n",
|
|
"\n",
|
|
" plt.tight_layout()\n",
|
|
" plt.show()\n",
|
|
" save_plot(plt, f'variety_comparison_{metric.lower().replace(\" \", \"_\").replace(\"/\", \"_\").replace(\"(\", \"\").replace(\")\", \"\")}')\n",
|
|
" plt.close()\n",
|
|
"\n",
|
|
"\n",
|
|
"def plot_efficiency_vs_production(comparison_data):\n",
|
|
" plt.figure(figsize=(10, 6))\n",
|
|
"\n",
|
|
" plt.scatter(comparison_data['Avg Olive Production (kg/ha)'],\n",
|
|
" comparison_data['Oil Efficiency (L/kg)'],\n",
|
|
" s=100)\n",
|
|
"\n",
|
|
" for i, row in comparison_data.iterrows():\n",
|
|
" plt.annotate(row['Variety'],\n",
|
|
" (row['Avg Olive Production (kg/ha)'], row['Oil Efficiency (L/kg)']),\n",
|
|
" xytext=(5, 5), textcoords='offset points')\n",
|
|
"\n",
|
|
" plt.title('Oil Efficiency vs Olive Production by Variety')\n",
|
|
" plt.xlabel('Average Olive Production (kg/ha)')\n",
|
|
" plt.ylabel('Oil Efficiency (L oil / kg olives)')\n",
|
|
" plt.tight_layout()\n",
|
|
" save_plot(plt, 'efficiency_vs_production')\n",
|
|
" plt.close()\n",
|
|
"\n",
|
|
"\n",
|
|
"def plot_water_efficiency_vs_production(comparison_data):\n",
|
|
" plt.figure(figsize=(10, 6))\n",
|
|
"\n",
|
|
" plt.scatter(comparison_data['Avg Olive Production (kg/ha)'],\n",
|
|
" comparison_data['Water Efficiency (L oil/m³ water)'],\n",
|
|
" s=100)\n",
|
|
"\n",
|
|
" for i, row in comparison_data.iterrows():\n",
|
|
" plt.annotate(row['Variety'],\n",
|
|
" (row['Avg Olive Production (kg/ha)'], row['Water Efficiency (L oil/m³ water)']),\n",
|
|
" xytext=(5, 5), textcoords='offset points')\n",
|
|
"\n",
|
|
" plt.title('Water Efficiency vs Olive Production by Variety')\n",
|
|
" plt.xlabel('Average Olive Production (kg/ha)')\n",
|
|
" plt.ylabel('Water Efficiency (L oil / m³ water)')\n",
|
|
" plt.tight_layout()\n",
|
|
" plt.show()\n",
|
|
" save_plot(plt, 'water_efficiency_vs_production')\n",
|
|
" plt.close()\n",
|
|
"\n",
|
|
"\n",
|
|
"def plot_water_need_vs_oil_production(comparison_data):\n",
|
|
" plt.figure(figsize=(10, 6))\n",
|
|
"\n",
|
|
" plt.scatter(comparison_data['Avg Water Need (m³/ha)'],\n",
|
|
" comparison_data['Avg Oil Production (L/ha)'],\n",
|
|
" s=100)\n",
|
|
"\n",
|
|
" for i, row in comparison_data.iterrows():\n",
|
|
" plt.annotate(row['Variety'],\n",
|
|
" (row['Avg Water Need (m³/ha)'], row['Avg Oil Production (L/ha)']),\n",
|
|
" xytext=(5, 5), textcoords='offset points')\n",
|
|
"\n",
|
|
" plt.title('Oil Production vs Water Need by Variety')\n",
|
|
" plt.xlabel('Average Water Need (m³/ha)')\n",
|
|
" plt.ylabel('Average Oil Production (L/ha)')\n",
|
|
" plt.tight_layout()\n",
|
|
" plt.show()\n",
|
|
" save_plot(plt, 'water_need_vs_oil_production')\n",
|
|
" plt.close()\n",
|
|
"\n",
|
|
"\n",
|
|
"def analyze_by_technique(simulated_data, olive_varieties):\n",
|
|
" # Pulisci i nomi delle colonne\n",
|
|
" df = simulated_data.copy()\n",
|
|
"\n",
|
|
" df.columns = clean_column_names(df)\n",
|
|
" df = encode_techniques(df)\n",
|
|
" all_varieties = olive_varieties['Varietà di Olive'].unique()\n",
|
|
" varieties = [clean_column_name(variety) for variety in all_varieties]\n",
|
|
"\n",
|
|
" technique_data = []\n",
|
|
"\n",
|
|
" for variety in varieties:\n",
|
|
" olive_prod_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_olive_prod')), None)\n",
|
|
" oil_prod_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_avg_oil_prod')), None)\n",
|
|
" tech_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_tech')), None)\n",
|
|
" water_need_col = next((col for col in df.columns if col.startswith(f'{variety}_') and col.endswith('_water_need')), None)\n",
|
|
"\n",
|
|
" if olive_prod_col and oil_prod_col and tech_col and water_need_col:\n",
|
|
" variety_data = df[[olive_prod_col, oil_prod_col, tech_col, water_need_col]]\n",
|
|
" variety_data = variety_data[variety_data[tech_col] != 0]\n",
|
|
"\n",
|
|
" if not variety_data.empty:\n",
|
|
" for tech in variety_data[tech_col].unique():\n",
|
|
" tech_data = variety_data[variety_data[tech_col] == tech]\n",
|
|
"\n",
|
|
" avg_olive_prod = pd.to_numeric(tech_data[olive_prod_col], errors='coerce').mean()\n",
|
|
" avg_oil_prod = pd.to_numeric(tech_data[oil_prod_col], errors='coerce').mean()\n",
|
|
" avg_water_need = pd.to_numeric(tech_data[water_need_col], errors='coerce').mean()\n",
|
|
"\n",
|
|
" efficiency = avg_oil_prod / avg_olive_prod if avg_olive_prod > 0 else 0\n",
|
|
" water_efficiency = avg_oil_prod / avg_water_need if avg_water_need > 0 else 0\n",
|
|
"\n",
|
|
" technique_data.append({\n",
|
|
" 'Variety': variety,\n",
|
|
" 'Technique': tech,\n",
|
|
" 'Technique String': decode_single_technique(tech),\n",
|
|
" 'Avg Olive Production (kg/ha)': avg_olive_prod,\n",
|
|
" 'Avg Oil Production (L/ha)': avg_oil_prod,\n",
|
|
" 'Avg Water Need (m³/ha)': avg_water_need,\n",
|
|
" 'Oil Efficiency (L/kg)': efficiency,\n",
|
|
" 'Water Efficiency (L oil/m³ water)': water_efficiency\n",
|
|
" })\n",
|
|
"\n",
|
|
" return pd.DataFrame(technique_data)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"def get_full_data(simulated_data, olive_varieties):\n",
|
|
" # Assumiamo che simulated_data contenga già tutti i dati necessari\n",
|
|
" # Includiamo solo le colonne rilevanti\n",
|
|
" relevant_columns = ['year', 'temp_mean', 'precip_sum', 'solar_energy_sum', 'ha', 'zone', 'olive_prod']\n",
|
|
"\n",
|
|
" # Aggiungiamo le colonne specifiche per varietà\n",
|
|
" all_varieties = olive_varieties['Varietà di Olive'].unique()\n",
|
|
" varieties = [clean_column_name(variety) for variety in all_varieties]\n",
|
|
" for variety in varieties:\n",
|
|
" relevant_columns.extend([f'{variety}_olive_prod', f'{variety}_tech'])\n",
|
|
"\n",
|
|
" return simulated_data[relevant_columns].copy()\n",
|
|
"\n",
|
|
"\n",
|
|
"def analyze_correlations(full_data, variety):\n",
|
|
" # Filtra i dati per la varietà specifica\n",
|
|
" variety_data = full_data[[col for col in full_data.columns if not col.startswith('_') or col.startswith(f'{variety}_')]]\n",
|
|
"\n",
|
|
" # Rinomina le colonne per chiarezza\n",
|
|
" variety_data = variety_data.rename(columns={\n",
|
|
" f'{variety}_olive_prod': 'olive_production',\n",
|
|
" f'{variety}_tech': 'technique'\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Matrice di correlazione\n",
|
|
" plt.figure(figsize=(12, 10))\n",
|
|
" corr_matrix = variety_data[['temp_mean', 'precip_sum', 'solar_energy_sum', 'olive_production']].corr()\n",
|
|
" sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')\n",
|
|
" plt.title(f'Matrice di Correlazione - {variety}')\n",
|
|
" plt.tight_layout()\n",
|
|
" plt.show()\n",
|
|
" save_plot(plt, f'correlation_matrix_{variety}')\n",
|
|
" plt.close()\n",
|
|
"\n",
|
|
" # Scatter plots\n",
|
|
" fig, axes = plt.subplots(2, 2, figsize=(20, 20))\n",
|
|
" fig.suptitle(f'Relazione tra Fattori Meteorologici e Produzione di Olive - {variety}', fontsize=16)\n",
|
|
"\n",
|
|
" for ax, var in zip(axes.flat, ['temp_mean', 'precip_sum', 'solar_energy_sum', 'ha']):\n",
|
|
" sns.scatterplot(data=variety_data, x=var, y='olive_production', hue='technique', ax=ax)\n",
|
|
" ax.set_title(f'{var.capitalize()} vs Produzione Olive')\n",
|
|
" ax.set_xlabel(var.capitalize())\n",
|
|
" ax.set_ylabel('Produzione Olive (kg/ettaro)')\n",
|
|
"\n",
|
|
" plt.tight_layout()\n",
|
|
" plt.show()\n",
|
|
" save_plot(plt, f'meteorological_factors_{variety}')\n",
|
|
" plt.close()"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "2QXm2B51thBA"
|
|
},
|
|
"source": [
|
|
"def prepare_transformer_data(df, olive_varieties_df):\n",
|
|
" # Crea una copia del DataFrame per evitare modifiche all'originale\n",
|
|
" df = df.copy()\n",
|
|
"\n",
|
|
" # Ordina per zona e anno\n",
|
|
" df = df.sort_values(['zone', 'year'])\n",
|
|
"\n",
|
|
" # Definisci le feature\n",
|
|
" temporal_features = ['temp_mean', 'precip_sum', 'solar_energy_sum']\n",
|
|
" static_features = ['ha'] # Feature statiche base\n",
|
|
" target_features = ['olive_prod', 'min_oil_prod', 'max_oil_prod', 'avg_oil_prod', 'total_water_need']\n",
|
|
"\n",
|
|
" # Ottieni le varietà pulite\n",
|
|
" all_varieties = olive_varieties_df['Varietà di Olive'].unique()\n",
|
|
" varieties = [clean_column_name(variety) for variety in all_varieties]\n",
|
|
"\n",
|
|
" # Crea la struttura delle feature per ogni varietà\n",
|
|
" variety_features = [\n",
|
|
" 'tech', 'pct', 'prod_t_ha', 'oil_prod_t_ha', 'oil_prod_l_ha',\n",
|
|
" 'min_yield_pct', 'max_yield_pct', 'min_oil_prod_l_ha', 'max_oil_prod_l_ha',\n",
|
|
" 'avg_oil_prod_l_ha', 'l_per_t', 'min_l_per_t', 'max_l_per_t', 'avg_l_per_t'\n",
|
|
" ]\n",
|
|
"\n",
|
|
" # Prepara dizionari per le nuove colonne\n",
|
|
" new_columns = {}\n",
|
|
"\n",
|
|
" # Prepara le feature per ogni varietà\n",
|
|
" for variety in varieties:\n",
|
|
" # Feature esistenti\n",
|
|
" for feature in variety_features:\n",
|
|
" col_name = f\"{variety}_{feature}\"\n",
|
|
" if col_name in df.columns:\n",
|
|
" if feature != 'tech': # Non includere la colonna tech direttamente\n",
|
|
" static_features.append(col_name)\n",
|
|
"\n",
|
|
" # Feature binarie per le tecniche di coltivazione\n",
|
|
" for technique in ['tradizionale', 'intensiva', 'superintensiva']:\n",
|
|
" col_name = f\"{variety}_{technique}\"\n",
|
|
" new_columns[col_name] = df[f\"{variety}_tech\"].notna() & (\n",
|
|
" df[f\"{variety}_tech\"].str.lower() == technique\n",
|
|
" ).fillna(False)\n",
|
|
" static_features.append(col_name)\n",
|
|
"\n",
|
|
" # Aggiungi tutte le nuove colonne in una volta sola\n",
|
|
" new_df = pd.concat([df] + [pd.Series(v, name=k) for k, v in new_columns.items()], axis=1)\n",
|
|
"\n",
|
|
" # Ordiniamo per zona e anno per mantenere la continuità temporale\n",
|
|
" df_sorted = new_df.sort_values(['zone', 'year'])\n",
|
|
"\n",
|
|
" # Definiamo la dimensione della finestra temporale\n",
|
|
" window_size = 41\n",
|
|
"\n",
|
|
" # Liste per raccogliere i dati\n",
|
|
" temporal_sequences = []\n",
|
|
" static_features_list = []\n",
|
|
" targets_list = []\n",
|
|
"\n",
|
|
" # Iteriamo per ogni zona\n",
|
|
" for zone in df_sorted['zone'].unique():\n",
|
|
" zone_data = df_sorted[df_sorted['zone'] == zone].reset_index(drop=True)\n",
|
|
"\n",
|
|
" if len(zone_data) >= window_size: # Verifichiamo che ci siano abbastanza dati\n",
|
|
" # Creiamo sequenze temporali scorrevoli\n",
|
|
" for i in range(len(zone_data) - window_size + 1):\n",
|
|
" # Sequenza temporale\n",
|
|
" temporal_window = zone_data.iloc[i:i + window_size][temporal_features].values\n",
|
|
" # Verifichiamo che non ci siano valori NaN\n",
|
|
" if not np.isnan(temporal_window).any():\n",
|
|
" temporal_sequences.append(temporal_window)\n",
|
|
"\n",
|
|
" # Feature statiche (prendiamo quelle dell'ultimo timestep della finestra)\n",
|
|
" static_features_list.append(zone_data.iloc[i + window_size - 1][static_features].values)\n",
|
|
"\n",
|
|
" # Target (prendiamo quelli dell'ultimo timestep della finestra)\n",
|
|
" targets_list.append(zone_data.iloc[i + window_size - 1][target_features].values)\n",
|
|
"\n",
|
|
" # Convertiamo in array numpy\n",
|
|
" X_temporal = np.array(temporal_sequences)\n",
|
|
" X_static = np.array(static_features_list)\n",
|
|
" y = np.array(targets_list)\n",
|
|
"\n",
|
|
" print(f\"Dataset completo - Temporal: {X_temporal.shape}, Static: {X_static.shape}, Target: {y.shape}\")\n",
|
|
"\n",
|
|
" # Split dei dati (usando indici casuali per una migliore distribuzione)\n",
|
|
" indices = np.random.permutation(len(X_temporal))\n",
|
|
" #train_idx = int(len(indices) * 0.7)\n",
|
|
" #val_idx = int(len(indices) * 0.85)\n",
|
|
"\n",
|
|
" train_idx = int(len(indices) * 0.65) # 65% training\n",
|
|
" val_idx = int(len(indices) * 0.85) # 20% validation\n",
|
|
" # Il resto rimane 15% test\n",
|
|
"\n",
|
|
" # Oppure versione con 25% validation:\n",
|
|
" #train_idx = int(len(indices) * 0.60) # 60% training\n",
|
|
" #val_idx = int(len(indices) * 0.85) # 25% validation\n",
|
|
"\n",
|
|
" train_indices = indices[:train_idx]\n",
|
|
" val_indices = indices[train_idx:val_idx]\n",
|
|
" test_indices = indices[val_idx:]\n",
|
|
"\n",
|
|
" # Split dei dati\n",
|
|
" X_temporal_train = X_temporal[train_indices]\n",
|
|
" X_temporal_val = X_temporal[val_indices]\n",
|
|
" X_temporal_test = X_temporal[test_indices]\n",
|
|
"\n",
|
|
" X_static_train = X_static[train_indices]\n",
|
|
" X_static_val = X_static[val_indices]\n",
|
|
" X_static_test = X_static[test_indices]\n",
|
|
"\n",
|
|
" y_train = y[train_indices]\n",
|
|
" y_val = y[val_indices]\n",
|
|
" y_test = y[test_indices]\n",
|
|
"\n",
|
|
" # Standardizzazione\n",
|
|
" scaler_temporal = StandardScaler()\n",
|
|
" scaler_static = StandardScaler()\n",
|
|
" scaler_y = StandardScaler()\n",
|
|
"\n",
|
|
" # Standardizzazione dei dati temporali\n",
|
|
" X_temporal_train = scaler_temporal.fit_transform(X_temporal_train.reshape(-1, len(temporal_features))).reshape(X_temporal_train.shape)\n",
|
|
" X_temporal_val = scaler_temporal.transform(X_temporal_val.reshape(-1, len(temporal_features))).reshape(X_temporal_val.shape)\n",
|
|
" X_temporal_test = scaler_temporal.transform(X_temporal_test.reshape(-1, len(temporal_features))).reshape(X_temporal_test.shape)\n",
|
|
"\n",
|
|
" # Standardizzazione dei dati statici\n",
|
|
" X_static_train = scaler_static.fit_transform(X_static_train)\n",
|
|
" X_static_val = scaler_static.transform(X_static_val)\n",
|
|
" X_static_test = scaler_static.transform(X_static_test)\n",
|
|
"\n",
|
|
" # Standardizzazione dei target\n",
|
|
" y_train = scaler_y.fit_transform(y_train)\n",
|
|
" y_val = scaler_y.transform(y_val)\n",
|
|
" y_test = scaler_y.transform(y_test)\n",
|
|
"\n",
|
|
" print(\"\\nShape dopo lo split e standardizzazione:\")\n",
|
|
" print(f\"Train - Temporal: {X_temporal_train.shape}, Static: {X_static_train.shape}, Target: {y_train.shape}\")\n",
|
|
" print(f\"Val - Temporal: {X_temporal_val.shape}, Static: {X_static_val.shape}, Target: {y_val.shape}\")\n",
|
|
" print(f\"Test - Temporal: {X_temporal_test.shape}, Static: {X_static_test.shape}, Target: {y_test.shape}\")\n",
|
|
"\n",
|
|
" # Prepara i dizionari di input\n",
|
|
" train_data = {'temporal': X_temporal_train, 'static': X_static_train}\n",
|
|
" val_data = {'temporal': X_temporal_val, 'static': X_static_val}\n",
|
|
" test_data = {'temporal': X_temporal_test, 'static': X_static_test}\n",
|
|
"\n",
|
|
" base_path = './kaggle/working/models/oil_transformer/'\n",
|
|
"\n",
|
|
" os.makedirs(base_path, exist_ok=True)\n",
|
|
"\n",
|
|
" joblib.dump(scaler_temporal, os.path.join(base_path, 'scaler_temporal.joblib'))\n",
|
|
" joblib.dump(scaler_static, os.path.join(base_path, 'scaler_static.joblib'))\n",
|
|
" joblib.dump(scaler_y, os.path.join(base_path, 'scaler_y.joblib'))\n",
|
|
"\n",
|
|
" return (train_data, y_train), (val_data, y_val), (test_data, y_test), (scaler_temporal, scaler_static, scaler_y)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Per denormalizzare e calcolare l'errore reale\n",
|
|
"def calculate_real_error(model, test_data, test_targets, scaler_y):\n",
|
|
" # Fare predizioni\n",
|
|
" predictions = model.predict(test_data)\n",
|
|
"\n",
|
|
" # Denormalizzare predizioni e target\n",
|
|
" predictions_real = scaler_y.inverse_transform(predictions)\n",
|
|
" targets_real = scaler_y.inverse_transform(test_targets)\n",
|
|
"\n",
|
|
" # Calcolare errore percentuale per ogni target\n",
|
|
" percentage_errors = []\n",
|
|
" absolute_errors = []\n",
|
|
"\n",
|
|
" for i in range(predictions_real.shape[1]):\n",
|
|
" mae = np.mean(np.abs(predictions_real[:, i] - targets_real[:, i]))\n",
|
|
" mape = np.mean(np.abs((predictions_real[:, i] - targets_real[:, i]) / targets_real[:, i])) * 100\n",
|
|
" percentage_errors.append(mape)\n",
|
|
" absolute_errors.append(mae)\n",
|
|
"\n",
|
|
" # Stampa risultati per ogni target\n",
|
|
" target_names = ['olive_prod', 'min_oil_prod', 'max_oil_prod', 'avg_oil_prod', 'total_water_need']\n",
|
|
"\n",
|
|
" print(\"\\nErrori per target:\")\n",
|
|
" print(\"-\" * 50)\n",
|
|
" for i, target in enumerate(target_names):\n",
|
|
" print(f\"{target}:\")\n",
|
|
" print(f\"MAE assoluto: {absolute_errors[i]:.2f}\")\n",
|
|
" print(f\"Errore percentuale medio: {percentage_errors[i]:.2f}%\")\n",
|
|
" print(f\"Precisione: {100 - percentage_errors[i]:.2f}%\")\n",
|
|
" print(\"-\" * 50)\n",
|
|
"\n",
|
|
" return percentage_errors, absolute_errors"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "d_WHC4rJthA8"
|
|
},
|
|
"source": [
|
|
"folder_path = './data/weather'\n",
|
|
"#raw_data = read_json_files(folder_path)\n",
|
|
"#weather_data = create_weather_dataset(raw_data)\n",
|
|
"#weather_data['datetime'] = pd.to_datetime(weather_data['datetime'], errors='coerce')\n",
|
|
"#weather_data['date'] = weather_data['datetime'].dt.date\n",
|
|
"#weather_data = weather_data.dropna(subset=['datetime'])\n",
|
|
"#weather_data['datetime'] = pd.to_datetime(weather_data['datetime'])\n",
|
|
"#weather_data['year'] = weather_data['datetime'].dt.year\n",
|
|
"#weather_data['month'] = weather_data['datetime'].dt.month\n",
|
|
"#weather_data['day'] = weather_data['datetime'].dt.day\n",
|
|
"#weather_data.head()\n",
|
|
"\n",
|
|
"#weather_data.to_parquet('./data/weather_data.parquet')"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "uvIOrixethA9"
|
|
},
|
|
"source": [
|
|
"weather_data = pd.read_parquet('./kaggle/input/olive-oil/weather_data.parquet')\n",
|
|
"\n",
|
|
"features = [\n",
|
|
" 'temp', 'tempmin', 'tempmax', 'humidity', 'cloudcover', 'windspeed', 'pressure', 'visibility',\n",
|
|
" 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'day_of_year_sin', 'day_of_year_cos',\n",
|
|
" 'temp_humidity', 'temp_cloudcover', 'visibility_cloudcover', 'clear_sky_factor', 'day_length',\n",
|
|
" 'temp_1h_lag', 'cloudcover_1h_lag', 'humidity_1h_lag', 'temp_rolling_mean_6h',\n",
|
|
" 'cloudcover_rolling_mean_6h'\n",
|
|
" ] + [col for col in weather_data.columns if 'season_' in col or 'time_period_' in col]\n"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 1000
|
|
},
|
|
"id": "7qF_3gVpthA9",
|
|
"jupyter": {
|
|
"is_executing": true
|
|
},
|
|
"outputId": "0de98483-956b-45e2-f9f3-8410f79cd307"
|
|
},
|
|
"source": [
|
|
"training_params = {\n",
|
|
" 'epochs': 100,\n",
|
|
" 'batch_size': 32,\n",
|
|
" 'verbose': 1\n",
|
|
"}\n",
|
|
"\n",
|
|
"X_scaled, scaler_X, y_scaled, scaler_y, data_after_2010 = prepare_solar_data(weather_data, features)\n",
|
|
"\n",
|
|
"radiation_scalers = {\n",
|
|
" 'X': scaler_X,\n",
|
|
" 'y': scaler_y,\n",
|
|
" 'solar_params': solar_params_scaler\n",
|
|
"}\n",
|
|
"\n",
|
|
"radiation_model, radiation_history = train_radiation_model(\n",
|
|
" X_train_radiation,\n",
|
|
" y_train_radiation,\n",
|
|
" X_val_radiation,\n",
|
|
" y_val_radiation,\n",
|
|
" solar_params_train,\n",
|
|
" solar_params_val,\n",
|
|
" scalers=radiation_scalers,\n",
|
|
" **training_params\n",
|
|
")"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "ixAzWupmthA-",
|
|
"outputId": "ee180137-1c9f-4eb1-8866-db1e1b1cb58c"
|
|
},
|
|
"source": [
|
|
"target_variables = ['solarradiation', 'solarenergy', 'uvindex']\n",
|
|
"\n",
|
|
"# Salva tutto direttamente\n",
|
|
"save_models_and_scalers(\n",
|
|
" models=models,\n",
|
|
" scalers=scalers, # Passiamo direttamente il dizionario degli scalers così com'è\n",
|
|
" target_variables=target_variables\n",
|
|
")"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "BlQK-7y7thA-"
|
|
},
|
|
"source": [
|
|
"data_after_2010 = weather_data[weather_data['year'] >= 2010].copy()\n",
|
|
"data_before_2010 = weather_data[weather_data['year'] < 2010].copy()\n",
|
|
"# Previsione delle variabili mancanti per data_before_2010\n",
|
|
"# Prepara data_before_2010\n",
|
|
"data_before_2010 = data_before_2010.sort_values('datetime')\n",
|
|
"data_before_2010.set_index('datetime', inplace=True)\n",
|
|
"\n",
|
|
"data_after_2010 = data_after_2010.sort_values('datetime')\n",
|
|
"data_after_2010.set_index('datetime', inplace=True)\n",
|
|
"\n",
|
|
"# Assicurati che le features non abbiano valori mancanti\n",
|
|
"data_before_2010[features] = data_before_2010[features].ffill()\n",
|
|
"data_before_2010[features] = data_before_2010[features].bfill()"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "r_hFmenDthA-",
|
|
"outputId": "650f8755-f6f6-47b4-fc74-c194dd81bf64"
|
|
},
|
|
"source": [
|
|
"#models, scaler_X, scalers_y, target_variables = load_models_and_scalers()\n",
|
|
"\n",
|
|
"# Effettua predizioni\n",
|
|
"predictions = predict_solar_variables(\n",
|
|
" data_before_2010=data_before_2010,\n",
|
|
" features=features,\n",
|
|
" models=models,\n",
|
|
" scalers=scalers, # dizionario completo degli scalers\n",
|
|
" target_variables=target_variables\n",
|
|
")\n",
|
|
"\n",
|
|
"# Crea dataset completo\n",
|
|
"weather_data_complete = create_complete_dataset(\n",
|
|
" data_before_2010,\n",
|
|
" data_after_2010,\n",
|
|
" predictions\n",
|
|
")\n",
|
|
"\n",
|
|
"# Salva il risultato\n",
|
|
"weather_data_complete.reset_index(inplace=True)\n",
|
|
"weather_data_complete.to_parquet(\n",
|
|
" './kaggle/working/data/weather_data_complete.parquet',\n",
|
|
" index=False\n",
|
|
")"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "IKObKOVEthA-"
|
|
},
|
|
"source": [
|
|
"## 2. Esplorazione dei Dati Meteo"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "Z64O5RD9thA-"
|
|
},
|
|
"source": [
|
|
"weather_data = pd.read_parquet('./kaggle/working/data/weather_data_complete.parquet')"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "f3j3IUvothA-",
|
|
"outputId": "a7f38468-f2f4-491e-eda5-ba6e6b8064ee"
|
|
},
|
|
"source": [
|
|
"# Visualizzazione delle tendenze temporali\n",
|
|
"fig, axes = plt.subplots(6, 1, figsize=(15, 20))\n",
|
|
"weather_data.set_index('date')['temp'].plot(ax=axes[0], title='Temperatura Media Giornaliera')\n",
|
|
"weather_data.set_index('date')['humidity'].plot(ax=axes[1], title='Umidità Media Giornaliera')\n",
|
|
"weather_data.set_index('date')['solarradiation'].plot(ax=axes[2], title='Radiazione Solare Giornaliera')\n",
|
|
"weather_data.set_index('date')['solarenergy'].plot(ax=axes[3], title='Radiazione Solare Giornaliera')\n",
|
|
"weather_data.set_index('date')['uvindex'].plot(ax=axes[4], title='Precipitazioni Giornaliere')\n",
|
|
"weather_data.set_index('date')['precip'].plot(ax=axes[4], title='Precipitazioni Giornaliere')\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()\n",
|
|
"save_plot(plt, 'weather_trends')\n",
|
|
"plt.close()"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "DHcEwp3pthA_"
|
|
},
|
|
"source": [
|
|
"## 3. Simulazione dei Dati di Produzione Annuale"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "5oG_nhbMthA_"
|
|
},
|
|
"source": [
|
|
"olive_varieties = pd.read_csv('./kaggle/input/olive-oil/variety_olive_oil_production.csv')\n",
|
|
"\n",
|
|
"olive_varieties = add_olive_water_consumption_correlation(olive_varieties)\n",
|
|
"\n",
|
|
"olive_varieties.to_parquet(\"./kaggle/working/data/olive_varieties.parquet\")"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "Y2IH37lAthA_",
|
|
"outputId": "d14e77c8-a4fb-4328-f6c6-de788bca8188"
|
|
},
|
|
"source": [
|
|
"olive_varieties = pd.read_parquet(\"./kaggle/working/data/olive_varieties.parquet\")\n",
|
|
"\n",
|
|
"weather_data = pd.read_parquet('./kaggle/working/data/weather_data_complete.parquet')\n",
|
|
"\n",
|
|
"simulated_data = simulate_olive_production_parallel(weather_data, olive_varieties, 1000, random_state_value)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Visualizza il mapping delle tecniche\n",
|
|
"print_technique_mapping()"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "4izJmAsbthA_",
|
|
"outputId": "9f871e9b-c9b5-406d-f482-b925befd9dad"
|
|
},
|
|
"source": [
|
|
"simulated_data = pd.read_parquet(\"./kaggle/working/data/simulated_data.parquet\")\n",
|
|
"\n",
|
|
"# Esecuzione dell'analisi\n",
|
|
"comparison_data = prepare_comparison_data(simulated_data, olive_varieties)\n",
|
|
"\n",
|
|
"# Genera i grafici\n",
|
|
"plot_variety_comparison(comparison_data, 'Avg Olive Production (kg/ha)')\n",
|
|
"plot_variety_comparison(comparison_data, 'Avg Oil Production (L/ha)')\n",
|
|
"plot_variety_comparison(comparison_data, 'Avg Water Need (m³/ha)')\n",
|
|
"plot_variety_comparison(comparison_data, 'Oil Efficiency (L/kg)')\n",
|
|
"plot_variety_comparison(comparison_data, 'Water Efficiency (L oil/m³ water)')\n",
|
|
"plot_efficiency_vs_production(comparison_data)\n",
|
|
"plot_water_efficiency_vs_production(comparison_data)\n",
|
|
"plot_water_need_vs_oil_production(comparison_data)\n",
|
|
"\n",
|
|
"# Analisi per tecnica\n",
|
|
"technique_data = analyze_by_technique(simulated_data, olive_varieties)\n",
|
|
"\n",
|
|
"print(technique_data)\n",
|
|
"\n",
|
|
"# Stampa un sommario statistico\n",
|
|
"print(\"Comparison by Variety:\")\n",
|
|
"print(comparison_data.set_index('Variety'))\n",
|
|
"print(\"\\nBest Varieties by Water Efficiency:\")\n",
|
|
"print(comparison_data.sort_values('Water Efficiency (L oil/m³ water)', ascending=False).head())"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "dwhl4ID_thBA"
|
|
},
|
|
"source": [
|
|
"## 4. Analisi della Relazione tra Meteo e Produzione"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "b28MG3NGthBA",
|
|
"outputId": "ac0759ce-ee6e-49e0-9ddd-a70d01ea18ff"
|
|
},
|
|
"source": [
|
|
"# Uso delle funzioni\n",
|
|
"full_data = get_full_data(simulated_data, olive_varieties)\n",
|
|
"\n",
|
|
"# Assumiamo che 'selected_variety' sia definito altrove nel codice\n",
|
|
"# Per esempio:\n",
|
|
"selected_variety = 'nocellara_delletna'\n",
|
|
"\n",
|
|
"analyze_correlations(full_data, selected_variety)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "OZQ6hHFLthBA"
|
|
},
|
|
"source": [
|
|
"## 5. Preparazione del Modello di Machine Learning"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "smX8MBhithBA"
|
|
},
|
|
"source": [
|
|
"## Divisione train/validation/test:\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "tupaX2LNthBA",
|
|
"outputId": "0a7968cd-9fef-4873-b834-d6b13fe805be"
|
|
},
|
|
"source": [
|
|
"simulated_data = pd.read_parquet(\"./kaggle/working/data/simulated_data.parquet\")\n",
|
|
"olive_varieties = pd.read_parquet(\"./kaggle/working/data/olive_varieties.parquet\")\n",
|
|
"\n",
|
|
"(train_data, train_targets), (val_data, val_targets), (test_data, test_targets), scalers = prepare_transformer_data(simulated_data, olive_varieties)\n",
|
|
"\n",
|
|
"scaler_temporal, scaler_static, scaler_y = scalers\n",
|
|
"\n",
|
|
"print(\"Temporal data shape:\", train_data['temporal'].shape)\n",
|
|
"print(\"Static data shape:\", train_data['static'].shape)\n",
|
|
"print(\"Target shape:\", train_targets.shape)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "kE7oohfsthBB"
|
|
},
|
|
"source": [
|
|
"## OliveOilTransformer"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "_l868dFFthBB",
|
|
"outputId": "b67993d4-a49e-4b75-d346-bf7f362f932d"
|
|
},
|
|
"source": [
|
|
"@keras.saving.register_keras_serializable()\n",
|
|
"class DataAugmentation(tf.keras.layers.Layer):\n",
|
|
" \"\"\"Custom layer per l'augmentation dei dati\"\"\"\n",
|
|
"\n",
|
|
" def __init__(self, noise_stddev=0.03, **kwargs):\n",
|
|
" super().__init__(**kwargs)\n",
|
|
" self.noise_stddev = noise_stddev\n",
|
|
"\n",
|
|
" def call(self, inputs, training=None):\n",
|
|
" if training:\n",
|
|
" return inputs + tf.random.normal(\n",
|
|
" shape=tf.shape(inputs),\n",
|
|
" mean=0.0,\n",
|
|
" stddev=self.noise_stddev\n",
|
|
" )\n",
|
|
" return inputs\n",
|
|
"\n",
|
|
" def get_config(self):\n",
|
|
" config = super().get_config()\n",
|
|
" config.update({\"noise_stddev\": self.noise_stddev})\n",
|
|
" return config\n",
|
|
"\n",
|
|
"\n",
|
|
"@keras.saving.register_keras_serializable()\n",
|
|
"class PositionalEncoding(tf.keras.layers.Layer):\n",
|
|
" \"\"\"Custom layer per l'encoding posizionale\"\"\"\n",
|
|
"\n",
|
|
" def __init__(self, d_model, **kwargs):\n",
|
|
" super().__init__(**kwargs)\n",
|
|
" self.d_model = d_model\n",
|
|
"\n",
|
|
" def build(self, input_shape):\n",
|
|
" _, seq_length, _ = input_shape\n",
|
|
"\n",
|
|
" # Crea la matrice di encoding posizionale\n",
|
|
" position = tf.range(seq_length, dtype=tf.float32)[:, tf.newaxis]\n",
|
|
" div_term = tf.exp(\n",
|
|
" tf.range(0, self.d_model, 2, dtype=tf.float32) *\n",
|
|
" (-tf.math.log(10000.0) / self.d_model)\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Calcola sin e cos\n",
|
|
" pos_encoding = tf.zeros((1, seq_length, self.d_model))\n",
|
|
" pos_encoding_even = tf.sin(position * div_term)\n",
|
|
" pos_encoding_odd = tf.cos(position * div_term)\n",
|
|
"\n",
|
|
" # Assegna i valori alle posizioni pari e dispari\n",
|
|
" pos_encoding = tf.concat(\n",
|
|
" [tf.expand_dims(pos_encoding_even, -1),\n",
|
|
" tf.expand_dims(pos_encoding_odd, -1)],\n",
|
|
" axis=-1\n",
|
|
" )\n",
|
|
" pos_encoding = tf.reshape(pos_encoding, (1, seq_length, -1))\n",
|
|
" pos_encoding = pos_encoding[:, :, :self.d_model]\n",
|
|
"\n",
|
|
" # Salva l'encoding come peso non trainabile\n",
|
|
" self.pos_encoding = self.add_weight(\n",
|
|
" shape=(1, seq_length, self.d_model),\n",
|
|
" initializer=tf.keras.initializers.Constant(pos_encoding),\n",
|
|
" trainable=False,\n",
|
|
" name='positional_encoding'\n",
|
|
" )\n",
|
|
"\n",
|
|
" super().build(input_shape)\n",
|
|
"\n",
|
|
" def call(self, inputs):\n",
|
|
" # Broadcast l'encoding posizionale sul batch\n",
|
|
" batch_size = tf.shape(inputs)[0]\n",
|
|
" pos_encoding_tiled = tf.tile(self.pos_encoding, [batch_size, 1, 1])\n",
|
|
" return inputs + pos_encoding_tiled\n",
|
|
"\n",
|
|
" def get_config(self):\n",
|
|
" config = super().get_config()\n",
|
|
" config.update({\"d_model\": self.d_model})\n",
|
|
" return config\n",
|
|
"\n",
|
|
"\n",
|
|
"@keras.saving.register_keras_serializable()\n",
|
|
"class WarmUpLearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):\n",
|
|
" \"\"\"Custom learning rate schedule with linear warmup and exponential decay.\"\"\"\n",
|
|
"\n",
|
|
" def __init__(self, initial_learning_rate=1e-3, warmup_steps=500, decay_steps=5000):\n",
|
|
" super().__init__()\n",
|
|
" self.initial_learning_rate = initial_learning_rate\n",
|
|
" self.warmup_steps = warmup_steps\n",
|
|
" self.decay_steps = decay_steps\n",
|
|
"\n",
|
|
" def __call__(self, step):\n",
|
|
" warmup_pct = tf.cast(step, tf.float32) / self.warmup_steps\n",
|
|
" warmup_lr = self.initial_learning_rate * warmup_pct\n",
|
|
" decay_factor = tf.pow(0.1, tf.cast(step, tf.float32) / self.decay_steps)\n",
|
|
" decayed_lr = self.initial_learning_rate * decay_factor\n",
|
|
" return tf.where(step < self.warmup_steps, warmup_lr, decayed_lr)\n",
|
|
"\n",
|
|
" def get_config(self):\n",
|
|
" return {\n",
|
|
" 'initial_learning_rate': self.initial_learning_rate,\n",
|
|
" 'warmup_steps': self.warmup_steps,\n",
|
|
" 'decay_steps': self.decay_steps\n",
|
|
" }\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_olive_oil_transformer(temporal_shape, static_shape, num_outputs,\n",
|
|
" d_model=128, num_heads=8, ff_dim=256,\n",
|
|
" num_transformer_blocks=4, mlp_units=[256, 128, 64],\n",
|
|
" dropout=0.2):\n",
|
|
" \"\"\"\n",
|
|
" Crea un transformer per la predizione della produzione di olio d'oliva.\n",
|
|
" \"\"\"\n",
|
|
" # Input layers\n",
|
|
" temporal_input = tf.keras.layers.Input(shape=temporal_shape, name='temporal')\n",
|
|
" static_input = tf.keras.layers.Input(shape=static_shape, name='static')\n",
|
|
"\n",
|
|
" # === TEMPORAL PATH ===\n",
|
|
" x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(temporal_input)\n",
|
|
" x = DataAugmentation()(x)\n",
|
|
"\n",
|
|
" # Temporal projection\n",
|
|
" x = tf.keras.layers.Dense(\n",
|
|
" d_model // 2,\n",
|
|
" activation='gelu',\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(1e-5)\n",
|
|
" )(x)\n",
|
|
" x = tf.keras.layers.Dropout(dropout)(x)\n",
|
|
" x = tf.keras.layers.Dense(\n",
|
|
" d_model,\n",
|
|
" activation='gelu',\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(1e-5)\n",
|
|
" )(x)\n",
|
|
"\n",
|
|
" # Positional encoding\n",
|
|
" x = PositionalEncoding(d_model)(x)\n",
|
|
"\n",
|
|
" # Transformer blocks\n",
|
|
" skip_connection = x\n",
|
|
" for _ in range(num_transformer_blocks):\n",
|
|
" # Self-attention\n",
|
|
" attention_output = tf.keras.layers.MultiHeadAttention(\n",
|
|
" num_heads=num_heads,\n",
|
|
" key_dim=d_model // num_heads,\n",
|
|
" value_dim=d_model // num_heads\n",
|
|
" )(x, x)\n",
|
|
" attention_output = tf.keras.layers.Dropout(dropout)(attention_output)\n",
|
|
"\n",
|
|
" # Residual connection con pesi addestrabili\n",
|
|
" residual_weights = tf.keras.layers.Dense(d_model, activation='sigmoid')(x)\n",
|
|
" x = tf.keras.layers.Add()([x, residual_weights * attention_output])\n",
|
|
" x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)\n",
|
|
"\n",
|
|
" # Feed-forward network\n",
|
|
" ffn = tf.keras.layers.Dense(ff_dim, activation=\"gelu\")(x)\n",
|
|
" ffn = tf.keras.layers.Dropout(dropout)(ffn)\n",
|
|
" ffn = tf.keras.layers.Dense(d_model)(ffn)\n",
|
|
" ffn = tf.keras.layers.Dropout(dropout)(ffn)\n",
|
|
"\n",
|
|
" # Second residual connection\n",
|
|
" x = tf.keras.layers.Add()([x, ffn])\n",
|
|
" x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)\n",
|
|
"\n",
|
|
" # Add final skip connection\n",
|
|
" x = tf.keras.layers.Add()([x, skip_connection])\n",
|
|
"\n",
|
|
" # Temporal pooling\n",
|
|
" attention_pooled = tf.keras.layers.MultiHeadAttention(\n",
|
|
" num_heads=num_heads,\n",
|
|
" key_dim=d_model // 4\n",
|
|
" )(x, x)\n",
|
|
" attention_pooled = tf.keras.layers.GlobalAveragePooling1D()(attention_pooled)\n",
|
|
"\n",
|
|
" # Additional pooling operations\n",
|
|
" avg_pooled = tf.keras.layers.GlobalAveragePooling1D()(x)\n",
|
|
" max_pooled = tf.keras.layers.GlobalMaxPooling1D()(x)\n",
|
|
"\n",
|
|
" # Combine pooling results\n",
|
|
" temporal_features = tf.keras.layers.Concatenate()(\n",
|
|
" [attention_pooled, avg_pooled, max_pooled]\n",
|
|
" )\n",
|
|
"\n",
|
|
" # === STATIC PATH ===\n",
|
|
" static_features = tf.keras.layers.LayerNormalization(epsilon=1e-6)(static_input)\n",
|
|
" for units in [256, 128, 64]:\n",
|
|
" static_features = tf.keras.layers.Dense(\n",
|
|
" units,\n",
|
|
" activation='gelu',\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(1e-5)\n",
|
|
" )(static_features)\n",
|
|
" static_features = tf.keras.layers.Dropout(dropout)(static_features)\n",
|
|
"\n",
|
|
" # === FEATURE FUSION ===\n",
|
|
" combined = tf.keras.layers.Concatenate()([temporal_features, static_features])\n",
|
|
"\n",
|
|
" # === MLP HEAD ===\n",
|
|
" x = combined\n",
|
|
" for units in mlp_units:\n",
|
|
" x = tf.keras.layers.BatchNormalization()(x)\n",
|
|
" x = tf.keras.layers.Dense(\n",
|
|
" units,\n",
|
|
" activation=\"gelu\",\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(1e-5)\n",
|
|
" )(x)\n",
|
|
" x = tf.keras.layers.Dropout(dropout)(x)\n",
|
|
"\n",
|
|
" # Output layer\n",
|
|
" outputs = tf.keras.layers.Dense(\n",
|
|
" num_outputs,\n",
|
|
" activation='linear',\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(1e-5)\n",
|
|
" )(x)\n",
|
|
"\n",
|
|
" # Create model\n",
|
|
" model = tf.keras.Model(\n",
|
|
" inputs={'temporal': temporal_input, 'static': static_input},\n",
|
|
" outputs=outputs,\n",
|
|
" name='OilTransformer'\n",
|
|
" )\n",
|
|
"\n",
|
|
" return model\n",
|
|
"\n",
|
|
"\n",
|
|
"def create_transformer_callbacks(target_names, val_data, val_targets):\n",
|
|
" \"\"\"\n",
|
|
" Crea i callbacks per il training del modello.\n",
|
|
" \n",
|
|
" Parameters:\n",
|
|
" -----------\n",
|
|
" target_names : list\n",
|
|
" Lista dei nomi dei target per il monitoraggio specifico\n",
|
|
" val_data : dict\n",
|
|
" Dati di validazione\n",
|
|
" val_targets : array\n",
|
|
" Target di validazione\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" --------\n",
|
|
" list\n",
|
|
" Lista dei callbacks configurati\n",
|
|
" \"\"\"\n",
|
|
"\n",
|
|
" # Custom Metric per target specifici\n",
|
|
" class TargetSpecificMetric(tf.keras.callbacks.Callback):\n",
|
|
" def __init__(self, validation_data, target_names):\n",
|
|
" super().__init__()\n",
|
|
" self.validation_data = validation_data\n",
|
|
" self.target_names = target_names\n",
|
|
"\n",
|
|
" def on_epoch_end(self, epoch, logs={}):\n",
|
|
" x_val, y_val = self.validation_data\n",
|
|
" y_pred = self.model.predict(x_val, verbose=0)\n",
|
|
"\n",
|
|
" for i, name in enumerate(self.target_names):\n",
|
|
" mae = np.mean(np.abs(y_val[:, i] - y_pred[:, i]))\n",
|
|
" logs[f'val_{name}_mae'] = mae\n",
|
|
"\n",
|
|
" # Crea le cartelle per i checkpoint e i log se non esistono\n",
|
|
" os.makedirs('./kaggle/working/models/oil_transformer/checkpoints', exist_ok=True)\n",
|
|
" os.makedirs('./kaggle/working/models/oil_transformer/logs', exist_ok=True)\n",
|
|
"\n",
|
|
" callbacks = [\n",
|
|
" # Early Stopping\n",
|
|
" tf.keras.callbacks.EarlyStopping(\n",
|
|
" monitor='val_loss',\n",
|
|
" patience=20,\n",
|
|
" restore_best_weights=True,\n",
|
|
" min_delta=0.0005,\n",
|
|
" mode='min'\n",
|
|
" ),\n",
|
|
"\n",
|
|
" # Model Checkpoint\n",
|
|
" tf.keras.callbacks.ModelCheckpoint(\n",
|
|
" filepath='./kaggle/working/models/oil_transformer/checkpoints/model_{epoch:02d}_{val_loss:.4f}.h5',\n",
|
|
" monitor='val_loss',\n",
|
|
" save_best_only=True,\n",
|
|
" mode='min',\n",
|
|
" save_weights_only=True\n",
|
|
" ),\n",
|
|
"\n",
|
|
" # Metric per target specifici\n",
|
|
" TargetSpecificMetric(\n",
|
|
" validation_data=(val_data, val_targets),\n",
|
|
" target_names=target_names\n",
|
|
" ),\n",
|
|
"\n",
|
|
" # Reduce LR on Plateau\n",
|
|
" tf.keras.callbacks.ReduceLROnPlateau(\n",
|
|
" monitor='val_loss',\n",
|
|
" factor=0.5,\n",
|
|
" patience=10,\n",
|
|
" min_lr=1e-6,\n",
|
|
" verbose=1\n",
|
|
" ),\n",
|
|
"\n",
|
|
" # TensorBoard logging\n",
|
|
" tf.keras.callbacks.TensorBoard(\n",
|
|
" log_dir='./kaggle/working/models/oil_transformer/logs',\n",
|
|
" histogram_freq=1,\n",
|
|
" write_graph=True,\n",
|
|
" update_freq='epoch'\n",
|
|
" )\n",
|
|
" ]\n",
|
|
"\n",
|
|
" return callbacks\n",
|
|
"\n",
|
|
"\n",
|
|
"def compile_model(model, learning_rate=1e-3):\n",
|
|
" \"\"\"\n",
|
|
" Compila il modello con le impostazioni standard.\n",
|
|
" \"\"\"\n",
|
|
" lr_schedule = WarmUpLearningRateSchedule(\n",
|
|
" initial_learning_rate=learning_rate,\n",
|
|
" warmup_steps=500,\n",
|
|
" decay_steps=5000\n",
|
|
" )\n",
|
|
"\n",
|
|
" model.compile(\n",
|
|
" optimizer=tf.keras.optimizers.AdamW(\n",
|
|
" learning_rate=lr_schedule,\n",
|
|
" weight_decay=0.01\n",
|
|
" ),\n",
|
|
" loss=tf.keras.losses.Huber(),\n",
|
|
" metrics=['mae']\n",
|
|
" )\n",
|
|
"\n",
|
|
" return model\n",
|
|
"\n",
|
|
"\n",
|
|
"def setup_transformer_training(train_data, train_targets, val_data, val_targets):\n",
|
|
" \"\"\"\n",
|
|
" Configura e prepara il transformer con dimensioni dinamiche basate sui dati.\n",
|
|
" \"\"\"\n",
|
|
" # Estrai le shape dai dati\n",
|
|
" temporal_shape = (train_data['temporal'].shape[1], train_data['temporal'].shape[2])\n",
|
|
" static_shape = (train_data['static'].shape[1],)\n",
|
|
" num_outputs = train_targets.shape[1]\n",
|
|
"\n",
|
|
" print(f\"Shape rilevate:\")\n",
|
|
" print(f\"- Temporal shape: {temporal_shape}\")\n",
|
|
" print(f\"- Static shape: {static_shape}\")\n",
|
|
" print(f\"- Numero di output: {num_outputs}\")\n",
|
|
"\n",
|
|
" # Target names basati sul numero di output\n",
|
|
" target_names = ['olive_prod', 'min_oil_prod', 'max_oil_prod', 'avg_oil_prod', 'total_water_need']\n",
|
|
"\n",
|
|
" # Assicurati che il numero di target names corrisponda al numero di output\n",
|
|
" assert len(target_names) == num_outputs, \\\n",
|
|
" f\"Il numero di target names ({len(target_names)}) non corrisponde al numero di output ({num_outputs})\"\n",
|
|
"\n",
|
|
" # Crea il modello con le dimensioni rilevate\n",
|
|
" model = create_olive_oil_transformer(\n",
|
|
" temporal_shape=temporal_shape,\n",
|
|
" static_shape=static_shape,\n",
|
|
" num_outputs=num_outputs\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Compila il modello\n",
|
|
" model = compile_model(model)\n",
|
|
"\n",
|
|
" # Crea i callbacks\n",
|
|
" callbacks = create_transformer_callbacks(target_names, val_data, val_targets)\n",
|
|
"\n",
|
|
" return model, callbacks, target_names\n",
|
|
"\n",
|
|
"\n",
|
|
"def train_transformer(train_data, train_targets, val_data, val_targets, epochs=150, batch_size=64, save_name='final_model'):\n",
|
|
" \"\"\"\n",
|
|
" Funzione principale per l'addestramento del transformer.\n",
|
|
" \"\"\"\n",
|
|
" # Setup del modello\n",
|
|
" model, callbacks, target_names = setup_transformer_training(\n",
|
|
" train_data, train_targets, val_data, val_targets\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Mostra il summary del modello\n",
|
|
" model.summary()\n",
|
|
" os.makedirs(f\"./kaggle/working/models/oil_transformer/\", exist_ok=True)\n",
|
|
" keras.utils.plot_model(model, f\"./kaggle/working/models/oil_transformer/{save_name}.png\", show_shapes=True)\n",
|
|
"\n",
|
|
" # Training\n",
|
|
" history = model.fit(\n",
|
|
" x=train_data,\n",
|
|
" y=train_targets,\n",
|
|
" validation_data=(val_data, val_targets),\n",
|
|
" epochs=epochs,\n",
|
|
" batch_size=batch_size,\n",
|
|
" callbacks=callbacks,\n",
|
|
" verbose=1,\n",
|
|
" shuffle=True\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Salva il modello finale\n",
|
|
" save_path = f'./kaggle/working/models/oil_transformer/{save_name}.keras'\n",
|
|
" model.save(save_path, save_format='keras')\n",
|
|
"\n",
|
|
" os.makedirs(f'./kaggle/working/models/oil_transformer/weights/', exist_ok=True)\n",
|
|
" model.save_weights(f'./kaggle/working/models/oil_transformer/weights')\n",
|
|
" print(f\"\\nModello salvato in: {save_path}\")\n",
|
|
"\n",
|
|
" return model, history"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "aytSjU1UthBB"
|
|
},
|
|
"source": [
|
|
"## Model Training"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "xE3iTWonthBB",
|
|
"outputId": "a784254e-deea-4fd3-8578-6a0dbbd45bd7"
|
|
},
|
|
"source": [
|
|
"model, history = train_transformer(train_data, train_targets, val_data, val_targets)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "hPPbvFYmthBB",
|
|
"outputId": "e6570501-00e1-4dde-81e2-4712652a46b3"
|
|
},
|
|
"source": [
|
|
"# Calcola gli errori reali\n",
|
|
"percentage_errors, absolute_errors = calculate_real_error(model, val_data, val_targets, scaler_y)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"def evaluate_model_performance(model, data, targets, set_name=\"\"):\n",
|
|
" \"\"\"\n",
|
|
" Valuta le performance del modello su un set di dati specifico.\n",
|
|
" \"\"\"\n",
|
|
" predictions = model.predict(data, verbose=0)\n",
|
|
"\n",
|
|
" target_names = ['olive_prod', 'min_oil_prod', 'max_oil_prod', 'avg_oil_prod', 'total_water_need']\n",
|
|
" metrics = {}\n",
|
|
"\n",
|
|
" for i, name in enumerate(target_names):\n",
|
|
" mae = np.mean(np.abs(targets[:, i] - predictions[:, i]))\n",
|
|
" mse = np.mean(np.square(targets[:, i] - predictions[:, i]))\n",
|
|
" rmse = np.sqrt(mse)\n",
|
|
" mape = np.mean(np.abs((targets[:, i] - predictions[:, i]) / (targets[:, i] + 1e-7))) * 100\n",
|
|
"\n",
|
|
" metrics[f\"{name}_mae\"] = mae\n",
|
|
" metrics[f\"{name}_rmse\"] = rmse\n",
|
|
" metrics[f\"{name}_mape\"] = mape\n",
|
|
"\n",
|
|
" if set_name:\n",
|
|
" print(f\"\\nPerformance sul set {set_name}:\")\n",
|
|
" for metric, value in metrics.items():\n",
|
|
" print(f\"{metric}: {value:.4f}\")\n",
|
|
"\n",
|
|
" return metrics\n",
|
|
"\n",
|
|
"\n",
|
|
"def retrain_model(base_model, train_data, train_targets,\n",
|
|
" val_data, val_targets,\n",
|
|
" test_data, test_targets,\n",
|
|
" epochs=50, batch_size=128):\n",
|
|
" \"\"\"\n",
|
|
" Implementa il retraining del modello con i dati combinati.\n",
|
|
" \"\"\"\n",
|
|
" print(\"Valutazione performance iniziali del modello...\")\n",
|
|
" initial_metrics = {\n",
|
|
" 'train': evaluate_model_performance(base_model, train_data, train_targets, \"training\"),\n",
|
|
" 'val': evaluate_model_performance(base_model, val_data, val_targets, \"validazione\"),\n",
|
|
" 'test': evaluate_model_performance(base_model, test_data, test_targets, \"test\")\n",
|
|
" }\n",
|
|
"\n",
|
|
" # Combina i dati per il retraining\n",
|
|
" combined_data = {\n",
|
|
" 'temporal': np.concatenate([train_data['temporal'], val_data['temporal'], test_data['temporal']]),\n",
|
|
" 'static': np.concatenate([train_data['static'], val_data['static'], test_data['static']])\n",
|
|
" }\n",
|
|
" combined_targets = np.concatenate([train_targets, val_targets, test_targets])\n",
|
|
"\n",
|
|
" # Crea una nuova suddivisione per la validazione\n",
|
|
" indices = np.arange(len(combined_targets))\n",
|
|
" np.random.shuffle(indices)\n",
|
|
"\n",
|
|
" split_idx = int(len(indices) * 0.9)\n",
|
|
" train_idx, val_idx = indices[:split_idx], indices[split_idx:]\n",
|
|
"\n",
|
|
" # Prepara i dati per il retraining\n",
|
|
" retrain_data = {k: v[train_idx] for k, v in combined_data.items()}\n",
|
|
" retrain_targets = combined_targets[train_idx]\n",
|
|
" retrain_val_data = {k: v[val_idx] for k, v in combined_data.items()}\n",
|
|
" retrain_val_targets = combined_targets[val_idx]\n",
|
|
"\n",
|
|
" checkpoint_path = './kaggle/working/models/oil_transformer/retrain_checkpoints'\n",
|
|
" os.makedirs(checkpoint_path, exist_ok=True)\n",
|
|
"\n",
|
|
" # Configura callbacks\n",
|
|
" callbacks = [\n",
|
|
" tf.keras.callbacks.EarlyStopping(\n",
|
|
" monitor='val_loss',\n",
|
|
" patience=10,\n",
|
|
" restore_best_weights=True,\n",
|
|
" min_delta=0.0001\n",
|
|
" ),\n",
|
|
" tf.keras.callbacks.ReduceLROnPlateau(\n",
|
|
" monitor='val_loss',\n",
|
|
" factor=0.2,\n",
|
|
" patience=5,\n",
|
|
" min_lr=1e-6,\n",
|
|
" verbose=1\n",
|
|
" ),\n",
|
|
" tf.keras.callbacks.ModelCheckpoint(\n",
|
|
" filepath=os.path.join(checkpoint_path, 'model_{epoch:02d}_{val_loss:.4f}.keras'),\n",
|
|
" monitor='val_loss',\n",
|
|
" save_best_only=True,\n",
|
|
" mode='min',\n",
|
|
" save_weights_only=True\n",
|
|
" )\n",
|
|
" ]\n",
|
|
"\n",
|
|
" # Imposta learning rate per il fine-tuning\n",
|
|
" optimizer = tf.keras.optimizers.AdamW(\n",
|
|
" learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(\n",
|
|
" initial_learning_rate=1e-4,\n",
|
|
" decay_steps=1000,\n",
|
|
" decay_rate=0.9\n",
|
|
" ),\n",
|
|
" weight_decay=0.01\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Ricompila il modello con il nuovo optimizer\n",
|
|
" base_model.compile(\n",
|
|
" optimizer=optimizer,\n",
|
|
" loss=tf.keras.losses.Huber(),\n",
|
|
" metrics=['mae']\n",
|
|
" )\n",
|
|
"\n",
|
|
" print(\"\\nAvvio retraining...\")\n",
|
|
" history = base_model.fit(\n",
|
|
" retrain_data,\n",
|
|
" retrain_targets,\n",
|
|
" validation_data=(retrain_val_data, retrain_val_targets),\n",
|
|
" epochs=epochs,\n",
|
|
" batch_size=batch_size,\n",
|
|
" callbacks=callbacks,\n",
|
|
" verbose=1\n",
|
|
" )\n",
|
|
"\n",
|
|
" print(\"\\nValutazione performance finali...\")\n",
|
|
" final_metrics = {\n",
|
|
" 'train': evaluate_model_performance(base_model, train_data, train_targets, \"training\"),\n",
|
|
" 'val': evaluate_model_performance(base_model, val_data, val_targets, \"validazione\"),\n",
|
|
" 'test': evaluate_model_performance(base_model, test_data, test_targets, \"test\")\n",
|
|
" }\n",
|
|
"\n",
|
|
" # Salva il modello finale\n",
|
|
" save_path = './kaggle/working/models/oil_transformer/retrained_model.keras'\n",
|
|
" os.makedirs(os.path.dirname(save_path), exist_ok=True)\n",
|
|
" base_model.save(save_path, save_format='keras')\n",
|
|
" print(f\"\\nModello riaddestrato salvato in: {save_path}\")\n",
|
|
"\n",
|
|
" # Report miglioramenti\n",
|
|
" print(\"\\nMiglioramenti delle performance:\")\n",
|
|
" for dataset in ['train', 'val', 'test']:\n",
|
|
" print(f\"\\nSet {dataset}:\")\n",
|
|
" for metric in initial_metrics[dataset].keys():\n",
|
|
" initial = initial_metrics[dataset][metric]\n",
|
|
" final = final_metrics[dataset][metric]\n",
|
|
" improvement = ((initial - final) / initial) * 100\n",
|
|
" print(f\"{metric}: {improvement:.2f}% di miglioramento\")\n",
|
|
"\n",
|
|
" return base_model, history, final_metrics\n",
|
|
"\n",
|
|
"\n",
|
|
"def start_retraining(model_path, train_data, train_targets,\n",
|
|
" val_data, val_targets,\n",
|
|
" test_data, test_targets,\n",
|
|
" epochs=50, batch_size=128):\n",
|
|
" \"\"\"\n",
|
|
" Avvia il processo di retraining in modo sicuro.\n",
|
|
" \"\"\"\n",
|
|
" try:\n",
|
|
" print(\"Caricamento del modello...\")\n",
|
|
" base_model = tf.keras.models.load_model(model_path, compile=False)\n",
|
|
" print(\"Modello caricato con successo!\")\n",
|
|
"\n",
|
|
" return retrain_model(\n",
|
|
" base_model=base_model,\n",
|
|
" train_data=train_data,\n",
|
|
" train_targets=train_targets,\n",
|
|
" val_data=val_data,\n",
|
|
" val_targets=val_targets,\n",
|
|
" test_data=test_data,\n",
|
|
" test_targets=test_targets,\n",
|
|
" epochs=epochs,\n",
|
|
" batch_size=batch_size\n",
|
|
" )\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Errore durante il retraining: {str(e)}\")\n",
|
|
" raise"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [
|
|
"model_path = './kaggle/working/models/oil_transformer/final_model.keras'\n",
|
|
"\n",
|
|
"retrained_model, retrain_history, final_metrics = start_retraining(\n",
|
|
" model_path=model_path,\n",
|
|
" train_data=train_data,\n",
|
|
" train_targets=train_targets,\n",
|
|
" val_data=val_data,\n",
|
|
" val_targets=val_targets,\n",
|
|
" test_data=test_data,\n",
|
|
" test_targets=test_targets,\n",
|
|
" epochs=50,\n",
|
|
" batch_size=128\n",
|
|
")\n",
|
|
"\n",
|
|
"# Visualizza i risultati\n",
|
|
"visualize_retraining_results(retrain_history, initial_metrics, final_metrics)"
|
|
],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "4BAI1zsJthBC"
|
|
},
|
|
"source": [
|
|
"## 8. Conclusioni e Prossimi Passi\n",
|
|
"\n",
|
|
"In questo notebook, abbiamo:\n",
|
|
"1. Caricato e analizzato i dati meteorologici\n",
|
|
"2. Simulato la produzione annuale di olive basata sui dati meteo\n",
|
|
"3. Esplorato le relazioni tra variabili meteorologiche e produzione di olive\n",
|
|
"4. Creato e valutato un modello di machine learning per prevedere la produzione\n",
|
|
"5. Utilizzato ARIMA per fare previsioni meteo\n",
|
|
"6. Previsto la produzione di olive per il prossimo anno\n",
|
|
"\n",
|
|
"Prossimi passi:\n",
|
|
"- Raccogliere dati reali sulla produzione di olive per sostituire i dati simulati\n",
|
|
"- Esplorare modelli più avanzati, come le reti neurali o i modelli di ensemble\n",
|
|
"- Incorporare altri fattori che potrebbero influenzare la produzione, come le pratiche agricole o l'età degli alberi\n",
|
|
"- Sviluppare una dashboard interattiva basata su questo modello"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {},
|
|
"source": [],
|
|
"outputs": [],
|
|
"execution_count": null
|
|
}
|
|
],
|
|
"metadata": {
|
|
"accelerator": "GPU",
|
|
"colab": {
|
|
"gpuType": "A100",
|
|
"provenance": []
|
|
},
|
|
"kaggle": {
|
|
"accelerator": "none",
|
|
"dataSources": [
|
|
{
|
|
"datasetId": 5950719,
|
|
"sourceId": 9725208,
|
|
"sourceType": "datasetVersion"
|
|
},
|
|
{
|
|
"datasetId": 5954901,
|
|
"sourceId": 9730815,
|
|
"sourceType": "datasetVersion"
|
|
}
|
|
],
|
|
"dockerImageVersionId": 30787,
|
|
"isGpuEnabled": false,
|
|
"isInternetEnabled": true,
|
|
"language": "python",
|
|
"sourceType": "notebook"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.0rc1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|