reconfigure project repo for publish

This commit is contained in:
Giuseppe Nucifora 2024-11-20 01:18:05 +01:00
parent ffc74dc262
commit d5ac423930
30 changed files with 1715 additions and 2692 deletions

View File

@ -0,0 +1,6 @@
[core]
autostage = true
remote = storage
['remote "storage"']
url = s3://olive-oil-dataset
region = eu-west-1

0
.idea/.gitignore generated vendored Normal file → Executable file
View File

2
.idea/.name generated Normal file → Executable file
View File

@ -1 +1 @@
weather_data.parquet
Tesi Pegaso

0
.idea/TesiPegaso.iml generated Normal file → Executable file
View File

16
.idea/csv-editor.xml generated Normal file
View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CsvFileAttributes">
<option name="attributeMap">
<map>
<entry key="$USER_HOME$/Downloads/olive-oli-user_accessKeys.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
</map>
</option>
</component>
</project>

0
.idea/inspectionProfiles/profiles_settings.xml generated Normal file → Executable file
View File

0
.idea/misc.xml generated Normal file → Executable file
View File

0
.idea/modules.xml generated Normal file → Executable file
View File

0
.idea/vcs.xml generated Normal file → Executable file
View File

0
elaborato_tesi_1_6.pdf Normal file → Executable file
View File

1
src/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/sources

3
src/README.md Normal file → Executable file
View File

@ -1 +1,4 @@
python -m olive_oil_train_dataset.create_train_dataset --random-seed 42 --num-simulations 100000 --batch-size 10000 --max-workers 7
python -m weather.uv_index.uv_index_model.py

0
src/__init__.py Normal file → Executable file
View File

0
src/__pycache__/__init__.cpython-39.pyc Normal file → Executable file
View File

View File

0
src/dashboard/environmental_simulator.py Normal file → Executable file
View File

View File

@ -315,36 +315,19 @@
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
"from tensorflow.keras.layers import Input, Dense, Dropout, Bidirectional, LSTM, LayerNormalization, Add, Activation, BatchNormalization, MultiHeadAttention, MaxPooling1D, Conv1D, GlobalMaxPooling1D, GlobalAveragePooling1D, \\\n",
" Concatenate, ZeroPadding1D, Lambda, AveragePooling1D, concatenate\n",
"from tensorflow.keras.layers import Dense, LSTM, Conv1D, Input, concatenate, Dropout, BatchNormalization, GlobalAveragePooling1D, Bidirectional, TimeDistributed, Attention, MultiHeadAttention\n",
"from sklearn.preprocessing import StandardScaler\n",
"import tensorflow_addons as tfa\n",
"from tensorflow.keras.models import Model\n",
"from tensorflow.keras.regularizers import l2\n",
"from tensorflow.keras.optimizers import Adam\n",
"from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint\n",
"from datetime import datetime\n",
"import os\n",
"import json\n",
"import joblib\n",
"import re\n",
"import pyarrow as pa\n",
"import pyarrow.parquet as pq\n",
"from tqdm import tqdm\n",
"from concurrent.futures import ProcessPoolExecutor, as_completed\n",
"from functools import partial\n",
"import psutil\n",
"import multiprocessing\n",
"from typing import List, Dict\n",
"from typing import List\n",
"\n",
"random_state_value = 42\n",
"random_state_value = None\n",
"execute_name = datetime.now().strftime(\"%Y-%m-%d_%H-%M\")\n",
"\n",
"base_project_dir = './'\n",
"data_dir = '../sources/'\n",
"data_dir = '../../sources/'\n",
"models_project_dir = base_project_dir\n",
"\n",
"os.makedirs(base_project_dir, exist_ok=True)\n",
@ -823,16 +806,18 @@
"\n",
" # Split dei dati (usando indici casuali per una migliore distribuzione)\n",
" indices = np.random.permutation(len(X_temporal))\n",
" #train_idx = int(len(indices) * 0.7)\n",
" #val_idx = int(len(indices) * 0.85)\n",
"\n",
" #train_idx = int(len(indices) * 0.7) # 70% training\n",
" #val_idx = int(len(indices) * 0.85) # 15% validation\n",
" # Il resto rimane 15% test\n",
"\n",
" train_idx = int(len(indices) * 0.65) # 65% training\n",
" val_idx = int(len(indices) * 0.85) # 20% validation\n",
" # Il resto rimane 15% test\n",
"\n",
" # Oppure versione con 25% validation:\n",
" #train_idx = int(len(indices) * 0.60) # 60% training\n",
" #val_idx = int(len(indices) * 0.85) # 25% validation\n",
" # Il resto rimane 15% test\n",
"\n",
" train_indices = indices[:train_idx]\n",
" val_indices = indices[train_idx:val_idx]\n",

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

0
src/olive-oil-dashboard.py Normal file → Executable file
View File

0
src/olive_config.json Normal file → Executable file
View File

0
src/olive_oil_train_dataset/create_train_dataset.py Normal file → Executable file
View File

0
src/setup.py Normal file → Executable file
View File

6
src/sources.dvc Normal file
View File

@ -0,0 +1,6 @@
outs:
- md5: 23e7daa876590e1c6ae9cb7af3be8028.dir
size: 984847509
nfiles: 5
hash: md5
path: sources

0
src/utils/__init__.py Normal file → Executable file
View File

0
src/utils/__pycache__/__init__.cpython-39.pyc Normal file → Executable file
View File

0
src/utils/__pycache__/helpers.cpython-39.pyc Normal file → Executable file
View File

8
src/utils/helpers.py Normal file → Executable file
View File

@ -429,7 +429,7 @@ def calculate_water_need(weather_data, base_need, optimal_temp):
rain_factor = 1 - 0.001 * weather_data['precip_sum'] # Diminuisce leggermente con l'aumentare delle precipitazioni
return base_need * temp_factor * rain_factor
def create_technique_mapping(olive_varieties, mapping_path='./kaggle/working/models/technique_mapping.joblib'):
def create_technique_mapping(olive_varieties, mapping_path='./sources/technique_mapping.joblib'):
# Estrai tutte le tecniche uniche dal dataset e convertile in lowercase
all_techniques = olive_varieties['Tecnica di Coltivazione'].str.lower().unique()
@ -443,7 +443,7 @@ def create_technique_mapping(olive_varieties, mapping_path='./kaggle/working/mod
return technique_mapping
def encode_techniques(df, mapping_path='./kaggle/working/models/technique_mapping.joblib'):
def encode_techniques(df, mapping_path='./sources/technique_mapping.joblib'):
if not os.path.exists(mapping_path):
raise FileNotFoundError(f"Mapping not found at {mapping_path}. Run create_technique_mapping first.")
@ -459,7 +459,7 @@ def encode_techniques(df, mapping_path='./kaggle/working/models/technique_mappin
return df
def decode_techniques(df, mapping_path='./kaggle/working/models/technique_mapping.joblib'):
def decode_techniques(df, mapping_path='./sources/technique_mapping.joblib'):
if not os.path.exists(mapping_path):
raise FileNotFoundError(f"Mapping not found at {mapping_path}")
@ -477,7 +477,7 @@ def decode_techniques(df, mapping_path='./kaggle/working/models/technique_mappin
return df
def decode_single_technique(technique_value, mapping_path='./kaggle/working/models/technique_mapping.joblib'):
def decode_single_technique(technique_value, mapping_path='./sources/technique_mapping.joblib'):
if not os.path.exists(mapping_path):
raise FileNotFoundError(f"Mapping not found at {mapping_path}")