reconfigure project repo for publish

This commit is contained in:
Giuseppe Nucifora 2024-11-20 01:18:05 +01:00
parent ffc74dc262
commit d5ac423930
30 changed files with 1715 additions and 2692 deletions

View File

@ -0,0 +1,6 @@
[core]
autostage = true
remote = storage
['remote "storage"']
url = s3://olive-oil-dataset
region = eu-west-1

0
.idea/.gitignore generated vendored Normal file → Executable file
View File

2
.idea/.name generated Normal file → Executable file
View File

@ -1 +1 @@
weather_data.parquet Tesi Pegaso

0
.idea/TesiPegaso.iml generated Normal file → Executable file
View File

16
.idea/csv-editor.xml generated Normal file
View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CsvFileAttributes">
<option name="attributeMap">
<map>
<entry key="$USER_HOME$/Downloads/olive-oli-user_accessKeys.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
</map>
</option>
</component>
</project>

0
.idea/inspectionProfiles/profiles_settings.xml generated Normal file → Executable file
View File

0
.idea/misc.xml generated Normal file → Executable file
View File

0
.idea/modules.xml generated Normal file → Executable file
View File

0
.idea/vcs.xml generated Normal file → Executable file
View File

0
elaborato_tesi_1_6.pdf Normal file → Executable file
View File

1
src/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/sources

3
src/README.md Normal file → Executable file
View File

@ -1 +1,4 @@
python -m olive_oil_train_dataset.create_train_dataset --random-seed 42 --num-simulations 100000 --batch-size 10000 --max-workers 7 python -m olive_oil_train_dataset.create_train_dataset --random-seed 42 --num-simulations 100000 --batch-size 10000 --max-workers 7
python -m weather.uv_index.uv_index_model.py

0
src/__init__.py Normal file → Executable file
View File

0
src/__pycache__/__init__.cpython-39.pyc Normal file → Executable file
View File

View File

0
src/dashboard/environmental_simulator.py Normal file → Executable file
View File

View File

@ -315,36 +315,19 @@
"import pandas as pd\n", "import pandas as pd\n",
"import numpy as np\n", "import numpy as np\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import seaborn as sns\n", "from sklearn.preprocessing import StandardScaler\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
"from tensorflow.keras.layers import Input, Dense, Dropout, Bidirectional, LSTM, LayerNormalization, Add, Activation, BatchNormalization, MultiHeadAttention, MaxPooling1D, Conv1D, GlobalMaxPooling1D, GlobalAveragePooling1D, \\\n",
" Concatenate, ZeroPadding1D, Lambda, AveragePooling1D, concatenate\n",
"from tensorflow.keras.layers import Dense, LSTM, Conv1D, Input, concatenate, Dropout, BatchNormalization, GlobalAveragePooling1D, Bidirectional, TimeDistributed, Attention, MultiHeadAttention\n",
"import tensorflow_addons as tfa\n", "import tensorflow_addons as tfa\n",
"from tensorflow.keras.models import Model\n",
"from tensorflow.keras.regularizers import l2\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint\n",
"from datetime import datetime\n", "from datetime import datetime\n",
"import os\n", "import os\n",
"import json\n",
"import joblib\n", "import joblib\n",
"import re\n", "import re\n",
"import pyarrow as pa\n", "from typing import List\n",
"import pyarrow.parquet as pq\n",
"from tqdm import tqdm\n",
"from concurrent.futures import ProcessPoolExecutor, as_completed\n",
"from functools import partial\n",
"import psutil\n",
"import multiprocessing\n",
"from typing import List, Dict\n",
"\n", "\n",
"random_state_value = 42\n", "random_state_value = None\n",
"execute_name = datetime.now().strftime(\"%Y-%m-%d_%H-%M\")\n", "execute_name = datetime.now().strftime(\"%Y-%m-%d_%H-%M\")\n",
"\n", "\n",
"base_project_dir = './'\n", "base_project_dir = './'\n",
"data_dir = '../sources/'\n", "data_dir = '../../sources/'\n",
"models_project_dir = base_project_dir\n", "models_project_dir = base_project_dir\n",
"\n", "\n",
"os.makedirs(base_project_dir, exist_ok=True)\n", "os.makedirs(base_project_dir, exist_ok=True)\n",
@ -823,16 +806,18 @@
"\n", "\n",
" # Split dei dati (usando indici casuali per una migliore distribuzione)\n", " # Split dei dati (usando indici casuali per una migliore distribuzione)\n",
" indices = np.random.permutation(len(X_temporal))\n", " indices = np.random.permutation(len(X_temporal))\n",
" #train_idx = int(len(indices) * 0.7)\n", "\n",
" #val_idx = int(len(indices) * 0.85)\n", " #train_idx = int(len(indices) * 0.7) # 70% training\n",
" #val_idx = int(len(indices) * 0.85) # 15% validation\n",
" # Il resto rimane 15% test\n",
"\n", "\n",
" train_idx = int(len(indices) * 0.65) # 65% training\n", " train_idx = int(len(indices) * 0.65) # 65% training\n",
" val_idx = int(len(indices) * 0.85) # 20% validation\n", " val_idx = int(len(indices) * 0.85) # 20% validation\n",
" # Il resto rimane 15% test\n", " # Il resto rimane 15% test\n",
"\n", "\n",
" # Oppure versione con 25% validation:\n",
" #train_idx = int(len(indices) * 0.60) # 60% training\n", " #train_idx = int(len(indices) * 0.60) # 60% training\n",
" #val_idx = int(len(indices) * 0.85) # 25% validation\n", " #val_idx = int(len(indices) * 0.85) # 25% validation\n",
" # Il resto rimane 15% test\n",
"\n", "\n",
" train_indices = indices[:train_idx]\n", " train_indices = indices[:train_idx]\n",
" val_indices = indices[train_idx:val_idx]\n", " val_indices = indices[train_idx:val_idx]\n",

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

0
src/olive-oil-dashboard.py Normal file → Executable file
View File

0
src/olive_config.json Normal file → Executable file
View File

0
src/olive_oil_train_dataset/create_train_dataset.py Normal file → Executable file
View File

0
src/setup.py Normal file → Executable file
View File

6
src/sources.dvc Normal file
View File

@ -0,0 +1,6 @@
outs:
- md5: 23e7daa876590e1c6ae9cb7af3be8028.dir
size: 984847509
nfiles: 5
hash: md5
path: sources

0
src/utils/__init__.py Normal file → Executable file
View File

0
src/utils/__pycache__/__init__.cpython-39.pyc Normal file → Executable file
View File

0
src/utils/__pycache__/helpers.cpython-39.pyc Normal file → Executable file
View File

8
src/utils/helpers.py Normal file → Executable file
View File

@ -429,7 +429,7 @@ def calculate_water_need(weather_data, base_need, optimal_temp):
rain_factor = 1 - 0.001 * weather_data['precip_sum'] # Diminuisce leggermente con l'aumentare delle precipitazioni rain_factor = 1 - 0.001 * weather_data['precip_sum'] # Diminuisce leggermente con l'aumentare delle precipitazioni
return base_need * temp_factor * rain_factor return base_need * temp_factor * rain_factor
def create_technique_mapping(olive_varieties, mapping_path='./kaggle/working/models/technique_mapping.joblib'): def create_technique_mapping(olive_varieties, mapping_path='./sources/technique_mapping.joblib'):
# Estrai tutte le tecniche uniche dal dataset e convertile in lowercase # Estrai tutte le tecniche uniche dal dataset e convertile in lowercase
all_techniques = olive_varieties['Tecnica di Coltivazione'].str.lower().unique() all_techniques = olive_varieties['Tecnica di Coltivazione'].str.lower().unique()
@ -443,7 +443,7 @@ def create_technique_mapping(olive_varieties, mapping_path='./kaggle/working/mod
return technique_mapping return technique_mapping
def encode_techniques(df, mapping_path='./kaggle/working/models/technique_mapping.joblib'): def encode_techniques(df, mapping_path='./sources/technique_mapping.joblib'):
if not os.path.exists(mapping_path): if not os.path.exists(mapping_path):
raise FileNotFoundError(f"Mapping not found at {mapping_path}. Run create_technique_mapping first.") raise FileNotFoundError(f"Mapping not found at {mapping_path}. Run create_technique_mapping first.")
@ -459,7 +459,7 @@ def encode_techniques(df, mapping_path='./kaggle/working/models/technique_mappin
return df return df
def decode_techniques(df, mapping_path='./kaggle/working/models/technique_mapping.joblib'): def decode_techniques(df, mapping_path='./sources/technique_mapping.joblib'):
if not os.path.exists(mapping_path): if not os.path.exists(mapping_path):
raise FileNotFoundError(f"Mapping not found at {mapping_path}") raise FileNotFoundError(f"Mapping not found at {mapping_path}")
@ -477,7 +477,7 @@ def decode_techniques(df, mapping_path='./kaggle/working/models/technique_mappin
return df return df
def decode_single_technique(technique_value, mapping_path='./kaggle/working/models/technique_mapping.joblib'): def decode_single_technique(technique_value, mapping_path='./sources/technique_mapping.joblib'):
if not os.path.exists(mapping_path): if not os.path.exists(mapping_path):
raise FileNotFoundError(f"Mapping not found at {mapping_path}") raise FileNotFoundError(f"Mapping not found at {mapping_path}")