| """ |
| Data tab. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import streamlit as st |
| import pandas as pd |
| import numpy as np |
|
|
| from config import settings |
| from ui.bootstrap import _BRAND_GREEN, _HAS_PLOTLY, load_labels, load_metrics |
|
|
| if _HAS_PLOTLY: |
| import plotly.graph_objects as go |
| from plotly.subplots import make_subplots |
|
|
|
|
|
|
| def render_tab_data() -> None: |
| _data_section = st.radio( |
|
|
| "Section", |
|
|
| ["Farquhar Model", "Model Validation", "Data Explorer"], |
|
|
| horizontal=True, |
|
|
| ) |
|
|
|
|
| if _data_section == "Farquhar Model": |
|
|
| st.header("How we measure vine photosynthesis") |
|
|
| st.markdown( |
|
|
| "Before we can predict photosynthesis, we need to **measure** it. " |
|
|
| "On-site sensors record light, temperature, humidity, and CO2 every " |
|
|
| "15 minutes. A well-established plant biology model (Farquhar et al., 1980) " |
|
|
| "converts these readings into the photosynthesis rate **A** — " |
|
|
| "how fast the vine is converting sunlight into sugar. " |
|
|
| "This tab shows the results of that calculation." |
|
|
| ) |
|
|
| with st.expander("How does the Farquhar model work?"): |
|
|
| st.markdown( |
|
|
| "Uses the **Farquhar et al. (1980)** mechanistic model with " |
|
|
| "**Greer & Weedon (2012)** grapevine parameters to compute the net leaf " |
|
|
| "photosynthesis rate **A** (\u00b5mol CO\u2082 m\u207b\u00b2 s\u207b\u00b9) from on-site sensor readings " |
|
|
| "(PAR, leaf temperature, air temperature, CO\u2082, VPD). Only daytime rows " |
|
|
| "(PAR > 50) during the growing season (May\u2013Sep) are used. " |
|
|
| "Temperature dependencies use **Bernacchi et al. (2001)** kinetic constants.\n\n" |
|
|
| "The core computation:\n\n" |
|
|
| "1. **Rubisco-limited rate:** Ac = Vcmax \u00b7 (ci \u2212 \u0393*) / (ci + Kc \u00b7 (1 + O\u1d62 / Ko))\n" |
|
|
| "2. **RuBP-limited rate:** Aj = J \u00b7 (ci \u2212 \u0393*) / (4\u00b7ci + 8\u00b7\u0393*)\n" |
|
|
| "3. **Net assimilation:** A = min(Ac, Aj) \u2212 Rd\n\n" |
|
|
| "Where Vcmax and Jmax follow temperature-dependent Arrhenius curves " |
|
|
| "(peak at 39\u00b0C and 36\u00b0C respectively for Semillon), J is solved from the " |
|
|
| "light-response quadratic, and ci (intercellular CO\u2082) is derived from " |
|
|
| "ambient CO\u2082 scaled by stomatal conductance (reduced by VPD and CWSI).\n\n" |
|
|
| "#### How VPD and CWSI reduce photosynthesis\n\n" |
|
|
| "Both VPD and CWSI act on photosynthesis through the same bottleneck: " |
|
|
| "**stomatal conductance (gs)**. Stomata are the pores on the leaf surface " |
|
|
| "that let CO\u2082 in for photosynthesis \u2014 but also let water vapor out.\n\n" |
|
|
| "**VPD (Vapor Pressure Deficit)** measures how dry the air is. " |
|
|
| "When VPD is high (hot, dry air), the leaf would lose water too fast, " |
|
|
| "so the vine partially closes its stomata to conserve water. " |
|
|
| "Less open stomata = less CO\u2082 enters the leaf = lower ci = lower A. " |
|
|
| "The model applies an exponential decay: " |
|
|
| "`gs_scale = exp(\u22120.3 \u00b7 max(0, VPD \u2212 1.0))`, " |
|
|
| "so the effect kicks in above 1 kPa and intensifies with drier air.\n\n" |
|
|
| "**CWSI (Crop Water Stress Index)** is computed from the leaf\u2013air " |
|
|
| "temperature difference: `CWSI = (Tleaf \u2212 Tair \u2212 \u0394Tmin) / (\u0394Tmax \u2212 \u0394Tmin)`, " |
|
|
| "clipped to [0, 1]. A well-watered vine transpires freely, keeping its " |
|
|
| "leaves cooler than the air (CWSI \u2248 0). A stressed vine closes stomata, " |
|
|
| "transpiration drops, and leaves heat up (CWSI \u2192 1). " |
|
|
| "The model reduces stomatal conductance by `(1 \u2212 0.5 \u00b7 CWSI)`, " |
|
|
| "so at full stress (CWSI = 1) stomatal opening is halved.\n\n" |
|
|
| "Combined effect on ci: " |
|
|
| "`ci = CO\u2082 \u00b7 (1 \u2212 1 / (1.6 \u00b7 gs_factor))` where " |
|
|
| "`gs_factor = 1.2 \u00b7 VPD_scale \u00b7 (1 \u2212 0.5 \u00b7 CWSI)`\n\n" |
|
|
| "This means on a hot, dry Negev afternoon (VPD > 3 kPa, CWSI > 0.5), " |
|
|
| "ci drops sharply and photosynthesis can fall even when light is abundant " |
|
|
| "\u2014 the vine has plenty of energy but cannot get enough CO\u2082 through " |
|
|
| "its closed stomata.\n\n" |
|
|
| "**Key references:**\n" |
|
|
| "- [Farquhar, von Caemmerer & Berry (1980)](https://doi.org/10.1007/BF00386231) \u2014 " |
|
|
| "Biochemical model of photosynthetic CO\u2082 assimilation\n" |
|
|
| "- [Greer & Weedon (2012)](https://doi.org/10.1111/j.1365-3040.2011.02471.x) \u2014 " |
|
|
| "Modelling photosynthetic responses to temperature of grapevine\n" |
|
|
| "- [Bernacchi et al. (2001)](https://doi.org/10.1046/j.1365-3040.2001.00668.x) \u2014 " |
|
|
| "Temperature dependence of Kc, Ko, and \u0393*" |
|
|
| ) |
|
|
|
|
| st.subheader("Sensor inputs") |
|
|
| st.markdown("The Farquhar model uses **6 columns** from the Air1 reference station, " |
|
|
| "sampled every 15 minutes:") |
|
|
| used_sensors_tab = pd.DataFrame([ |
|
|
| {"Column": "Air1_PAR_ref", "Measurement": "Photosynthetically Active Radiation", "Units": "\u00b5mol m\u207b\u00b2 s\u207b\u00b9", "Used in": "Farquhar: electron transport (J)", "Notes": "Primary light input driving photosynthesis rate"}, |
|
|
| {"Column": "Air1_leafTemperature_ref", "Measurement": "Leaf temperature", "Units": "\u00b0C", "Used in": "Farquhar: Vcmax, Jmax, Kc, Ko, \u0393*; CWSI", "Notes": "Controls enzyme kinetics; also used to compute CWSI"}, |
|
|
| {"Column": "Air1_airTemperature_ref", "Measurement": "Air temperature", "Units": "\u00b0C", "Used in": "CWSI computation", "Notes": "Tleaf \u2212 Tair drives the water stress index"}, |
|
|
| {"Column": "Air1_CO2_ref", "Measurement": "Ambient CO\u2082 concentration", "Units": "ppm", "Used in": "Farquhar: intercellular CO\u2082 (ci)", "Notes": "Substrate for carbon fixation by Rubisco (\u00d70.7 correction applied)"}, |
|
|
| {"Column": "Air1_VPD_ref", "Measurement": "Vapor Pressure Deficit", "Units": "kPa", "Used in": "Farquhar: stomatal conductance \u2192 ci", "Notes": "High VPD closes stomata, reducing ci and thus A"}, |
|
|
| {"Column": "Air1_airHumidity_ref", "Measurement": "Relative humidity", "Units": "%", "Used in": "Loaded but not consumed", "Notes": "VPD already encodes humidity; column is redundant"}, |
|
|
| ]) |
|
|
| st.dataframe(used_sensors_tab, hide_index=True) |
|
|
|
|
| labels_path = settings.PROCESSED_DIR / "stage1_labels.csv" |
|
|
| validation_img = settings.OUTPUTS_DIR / "stage1_validation.png" |
|
|
|
|
| if labels_path.exists(): |
|
|
| df_labels = load_labels(str(labels_path)) |
|
|
| df_labels.index = pd.to_datetime(df_labels.index, utc=True) |
|
|
|
|
| |
|
|
| c1, c2, c3, c4 = st.columns(4) |
|
|
| c1.metric("Observations", f"{len(df_labels):,}") |
|
|
| c2.metric("Avg photosynthesis rate", f"{df_labels.iloc[:, 0].mean():.1f}", |
|
|
| help="Higher values mean the vine is growing faster. Typical range: 5-20.") |
|
|
| c3.metric("Date from", df_labels.index.min().strftime("%Y-%m-%d")) |
|
|
| c4.metric("Date to", df_labels.index.max().strftime("%Y-%m-%d")) |
|
|
|
|
| with st.expander("What does this table show?"): |
|
|
| st.markdown( |
|
|
| "Descriptive statistics (count, mean, std, min, quartiles, max) of the " |
|
|
| "computed photosynthesis rate **A**. Typical grapevine values are 0\u201325 " |
|
|
| "\u00b5mol CO\u2082 m\u207b\u00b2 s\u207b\u00b9. Values outside this range may indicate sensor issues." |
|
|
| ) |
|
|
| st.dataframe(df_labels.describe()) |
|
|
| st.caption( |
|
|
| "This table summarizes the photosynthesis measurements. The 'mean' row shows " |
|
|
| "the average rate across all observations. Values between 5-20 are typical " |
|
|
| "for healthy grapevines during the growing season." |
|
|
| ) |
|
|
|
|
| st.download_button( |
|
|
| "Download labels CSV", |
|
|
| df_labels.to_csv(), |
|
|
| file_name="stage1_labels.csv", |
|
|
| mime="text/csv", |
|
|
| ) |
|
|
|
|
| if validation_img.exists(): |
|
|
| with st.expander("How to read the validation plots"): |
|
|
| st.markdown( |
|
|
| "**Top panel \u2014 Diurnal pattern:** A vs. hour of day (UTC). Expect a bell curve " |
|
|
| "peaking mid-morning to early afternoon when light and temperature are optimal.\n\n" |
|
|
| "**Bottom panel \u2014 A vs PAR:** Photosynthesis rate plotted against Photosynthetically " |
|
|
| "Active Radiation. A should increase with PAR and saturate at high light levels, " |
|
|
| "forming a characteristic light-response curve." |
|
|
| ) |
|
|
| st.image(str(validation_img), width='stretch') |
|
|
| else: |
|
|
| st.info("No pre-computed photosynthesis labels found. Run `python scripts/run_pipeline.py` to generate them.") |
|
|
|
|
| |
|
|
|
|
| if _data_section == "Model Validation": |
|
|
| st.header("FvCB Model Validation") |
|
|
| st.markdown( |
|
|
| "Comparison of our Farquhar-von Caemmerer-Berry (FvCB) photosynthesis model " |
|
|
| "against measured data from " |
|
|
| "[Greer & Weedon (2012)](https://doi.org/10.1111/j.1365-3040.2011.02471.x) " |
|
|
| "for field-grown *Vitis vinifera* cv. **Semillon** in a hot climate " |
|
|
| "(Riverina, NSW, Australia)." |
|
|
| ) |
|
|
|
|
| |
|
|
| _val_temps = [20, 25, 30, 35, 40] |
|
|
|
|
| |
|
|
| _paper_amax = {20: 12.0, 25: 16.9, 30: 19.9, 35: 15.3, 40: 12.0} |
|
|
| _paper_amax_se = {20: 1.5, 25: 1.3, 30: 1.8, 35: 1.2, 40: 1.5} |
|
|
|
|
| |
|
|
| _paper_vcmax = {20: 20, 25: 38.5, 30: 58, 35: 85, 40: 110} |
|
|
| _paper_jmax = {20: 60, 25: 98.3, 30: 135, 35: 165, 40: 170} |
|
|
|
|
| |
|
|
| _paper_stom_lim = {20: 13, 25: 20, 30: 25, 35: 31, 40: 34} |
|
|
|
|
| from src.farquhar_model import FarquharModel as _FMVal |
|
|
| _val_model = _FMVal() |
|
|
|
|
| |
|
|
| st.subheader("1. Photosynthetic light response at different temperatures") |
|
|
| st.markdown( |
|
|
| "Light response curves (A vs PFD) at five leaf temperatures. " |
|
|
| "Model run at ambient CO$_2$ = 389 ppm, VPD = 1.5 kPa." |
|
|
| ) |
|
|
|
|
| _val_vpd = st.slider( |
|
|
| "VPD for model curves (kPa)", 0.5, 3.0, 1.5, 0.1, |
|
|
| key="val_vpd" |
|
|
| ) |
|
|
| _val_pfds = np.arange(0, 2100, 25) |
|
|
|
|
| if _HAS_PLOTLY: |
|
|
| _lr_fig = go.Figure() |
|
|
| _temp_colors = {20: "#1f77b4", 25: "#2ca02c", 30: "#d62728", |
|
|
| 35: "#9467bd", 40: "#ff7f0e"} |
|
|
| for t in _val_temps: |
|
|
| |
|
|
| _a_vals = [_val_model.calc_photosynthesis( |
|
|
| PAR=float(p), Tleaf=t, CO2=389, VPD=_val_vpd, Tair=t |
|
|
| ) for p in _val_pfds] |
|
|
| _lr_fig.add_trace(go.Scatter( |
|
|
| x=_val_pfds, y=_a_vals, mode="lines", |
|
|
| name=f"{t} °C (model)", |
|
|
| line=dict(color=_temp_colors[t]), |
|
|
| )) |
|
|
| |
|
|
| _lr_fig.add_trace(go.Scatter( |
|
|
| x=[1800], y=[_paper_amax[t]], |
|
|
| mode="markers", |
|
|
| name=f"{t} °C (Greer & Weedon)", |
|
|
| marker=dict(color=_temp_colors[t], size=12, symbol="star", |
|
|
| line=dict(width=1, color="black")), |
|
|
| error_y=dict(type="data", array=[_paper_amax_se[t]], visible=True), |
|
|
| showlegend=True, |
|
|
| )) |
|
|
| _lr_fig.update_layout( |
|
|
| xaxis_title="PFD [µmol photons m⁻² s⁻¹]", |
|
|
| yaxis_title="A [µmol CO₂ m⁻² s⁻¹]", |
|
|
| height=500, |
|
|
| legend=dict(font=dict(size=10)), |
|
|
| ) |
|
|
| st.plotly_chart(_lr_fig) |
|
|
| else: |
|
|
| st.info("Install plotly for interactive charts.") |
|
|
|
|
| |
|
|
| st.subheader("2. Light-saturated A: Model vs Paper") |
|
|
| _rows = [] |
|
|
| for t in _val_temps: |
|
|
| a_model = _val_model.calc_photosynthesis( |
|
|
| PAR=2000, Tleaf=t, CO2=389, VPD=_val_vpd, Tair=t |
|
|
| ) |
|
|
| _rows.append({ |
|
|
| "T_leaf (°C)": t, |
|
|
| "A_model": round(a_model, 1), |
|
|
| f"A_paper (Greer & Weedon)": _paper_amax[t], |
|
|
| "Difference (%)": round((a_model - _paper_amax[t]) / _paper_amax[t] * 100, 0), |
|
|
| "Limitation": "RuBP regen." if t <= 30 else "Rubisco", |
|
|
| }) |
|
|
| st.dataframe(pd.DataFrame(_rows), hide_index=True) |
|
|
|
|
| |
|
|
| st.subheader("3. Vcmax and Jmax temperature response") |
|
|
| st.markdown( |
|
|
| "Temperature dependence of maximum carboxylation rate (Vcmax) and " |
|
|
| "electron transport capacity (Jmax). Model uses modified Arrhenius " |
|
|
| "(Medlyn et al. 2002) with Greer & Weedon (2012) activation/deactivation " |
|
|
| "energies. Topt(Vcmax) = 39 °C, Topt(Jmax) = 36 °C." |
|
|
| ) |
|
|
|
|
| _t_range = np.arange(15, 50, 0.5) |
|
|
| _vcmax_curve = [_val_model.calc_Vcmax(t + 273.15) for t in _t_range] |
|
|
| _jmax_curve = [_val_model.calc_Jmax(t + 273.15) for t in _t_range] |
|
|
|
|
| |
|
|
| _scale_v = _val_model.params["k25_vcmax"] / 38.5 |
|
|
| _scale_j = _val_model.params["k25_jmax"] / 98.3 |
|
|
| _paper_vcmax_scaled = {t: v * _scale_v for t, v in _paper_vcmax.items()} |
|
|
| _paper_jmax_scaled = {t: j * _scale_j for t, j in _paper_jmax.items()} |
|
|
|
|
| if _HAS_PLOTLY: |
|
|
| _vj_fig = make_subplots(rows=1, cols=2, |
|
|
| subplot_titles=("Vcmax", "Jmax")) |
|
|
| _vj_fig.add_trace(go.Scatter( |
|
|
| x=list(_t_range), y=_vcmax_curve, mode="lines", |
|
|
| name="Vcmax (model)", line=dict(color="#d62728"), |
|
|
| ), row=1, col=1) |
|
|
| _vj_fig.add_trace(go.Scatter( |
|
|
| x=list(_paper_vcmax_scaled.keys()), |
|
|
| y=list(_paper_vcmax_scaled.values()), |
|
|
| mode="markers", name="Vcmax (paper, scaled)", |
|
|
| marker=dict(color="#d62728", size=10, symbol="star", |
|
|
| line=dict(width=1, color="black")), |
|
|
| ), row=1, col=1) |
|
|
| _vj_fig.add_trace(go.Scatter( |
|
|
| x=list(_t_range), y=_jmax_curve, mode="lines", |
|
|
| name="Jmax (model)", line=dict(color="#1f77b4"), |
|
|
| ), row=1, col=2) |
|
|
| _vj_fig.add_trace(go.Scatter( |
|
|
| x=list(_paper_jmax_scaled.keys()), |
|
|
| y=list(_paper_jmax_scaled.values()), |
|
|
| mode="markers", name="Jmax (paper, scaled)", |
|
|
| marker=dict(color="#1f77b4", size=10, symbol="star", |
|
|
| line=dict(width=1, color="black")), |
|
|
| ), row=1, col=2) |
|
|
| _vj_fig.update_xaxes(title_text="Leaf temperature (°C)") |
|
|
| _vj_fig.update_yaxes(title_text="µmol m⁻² s⁻¹") |
|
|
| _vj_fig.update_layout(height=400) |
|
|
| st.plotly_chart(_vj_fig) |
|
|
| else: |
|
|
| st.info("Install plotly for interactive charts.") |
|
|
|
|
| |
|
|
| st.subheader("4. RuBP regeneration vs Rubisco carboxylation limitation") |
|
|
| st.markdown( |
|
|
| "The paper's key finding: **below 30 °C**, photosynthesis is limited by " |
|
|
| "RuBP regeneration (electron transport / light reactions). " |
|
|
| "**Above 30 °C**, Rubisco carboxylation becomes limiting due to " |
|
|
| "declining CO$_2$ affinity and increased photorespiration.\n\n" |
|
|
| "This 30 °C transition is critical for shading decisions:\n" |
|
|
| "- **Below 30 °C**: shading reduces light and hurts photosynthesis (RuBP-limited)\n" |
|
|
| "- **Above 30 °C**: shading may help by reducing heat stress on Rubisco" |
|
|
| ) |
|
|
|
|
| if _HAS_PLOTLY: |
|
|
| _ac_vals = [] |
|
|
| _aj_vals = [] |
|
|
| _t_lim = np.arange(15, 46, 0.5) |
|
|
| for t in _t_lim: |
|
|
| Tk = t + 273.15 |
|
|
| Vcmax = _val_model.calc_Vcmax(Tk) |
|
|
| Jmax = _val_model.calc_Jmax(Tk) |
|
|
| J = _val_model.calc_electron_transport(2000, Jmax) |
|
|
| gamma = _val_model.calc_gamma_star(Tk) |
|
|
| Kc = _val_model.calc_Kc(Tk) |
|
|
| Ko = _val_model.calc_Ko(Tk) |
|
|
| ci = _val_model._ci_from_ca(389, 1.5, 0.0) |
|
|
| Ac = Vcmax * (ci - gamma) / (ci + Kc * (1 + 210.0 / Ko)) |
|
|
| Aj = J * (ci - gamma) / (4 * ci + 8 * gamma) |
|
|
| Rd = 0.015 * Vcmax |
|
|
| _ac_vals.append(Ac - Rd) |
|
|
| _aj_vals.append(Aj - Rd) |
|
|
|
|
| _lim_fig = go.Figure() |
|
|
| _lim_fig.add_trace(go.Scatter( |
|
|
| x=list(_t_lim), y=_ac_vals, mode="lines", |
|
|
| name="Ac (Rubisco-limited)", |
|
|
| line=dict(color="#d62728", dash="dash"), |
|
|
| )) |
|
|
| _lim_fig.add_trace(go.Scatter( |
|
|
| x=list(_t_lim), y=_aj_vals, mode="lines", |
|
|
| name="Aj (RuBP-limited)", |
|
|
| line=dict(color="#1f77b4", dash="dash"), |
|
|
| )) |
|
|
| |
|
|
| _a_net = [max(0, min(ac, aj)) for ac, aj in zip(_ac_vals, _aj_vals)] |
|
|
| _lim_fig.add_trace(go.Scatter( |
|
|
| x=list(_t_lim), y=_a_net, mode="lines", |
|
|
| name="A_net = min(Ac, Aj)", |
|
|
| line=dict(color="black", width=3), |
|
|
| )) |
|
|
| _lim_fig.add_vline(x=30, line_dash="dot", line_color="gray", |
|
|
| annotation_text="30 °C transition") |
|
|
| _lim_fig.update_layout( |
|
|
| xaxis_title="Leaf temperature (°C)", |
|
|
| yaxis_title="A [µmol CO₂ m⁻² s⁻¹]", |
|
|
| height=450, |
|
|
| ) |
|
|
| st.plotly_chart(_lim_fig) |
|
|
|
|
| |
|
|
| st.subheader("5. Key findings from validation") |
|
|
| st.markdown(""" |
| |
| **Agreement with Greer & Weedon (2012):** |
| |
| - Temperature ranking of Amax matches: 30 °C > 25 °C > 35 °C > 20 °C > 40 °C |
| |
| - RuBP/Rubisco limitation transition occurs at ~30-32 °C (paper: 30 °C) |
| |
| - Vcmax peaks at 39 °C, Jmax peaks at 36 °C (exact match with paper) |
| |
| - Quantitative match within 2-15% at 20-35 °C |
| |
| - Jmax/Vcmax ratio declines from ~2.4 at 20 °C to ~1.1 at 45 °C (paper: 3.0 to 1.5) |
| |
| |
| **Known limitations:** |
| |
| - At 40 °C, model underestimates A by ~12% due to Bernacchi (2001) Rubisco kinetics |
| |
| being parameterised for tobacco, not heat-adapted grapevine |
| |
| - Stomatal response to temperature is modelled via VPD only; the paper shows |
| |
| direct temperature effects on gs (Table 1: 0.199 at 20 °C to 0.140 at 40 °C) |
| |
| - The model uses Ci-based (intercellular CO2) calculations; the paper uses |
| |
| Cc-based (chloroplast CO2) with mesophyll conductance gm = 5-10 µmol m⁻² s⁻¹ Pa⁻¹ |
| |
| |
| **Reference:** |
| |
| Greer, D.H. & Weedon, M.M. (2012) Modelling photosynthetic responses to temperature |
| |
| of grapevine (*Vitis vinifera* cv. Semillon) leaves on vines grown in a hot climate. |
| |
| *Plant, Cell & Environment*, 35, 1050-1064. |
| |
| [DOI: 10.1111/j.1365-3040.2011.02471.x](https://doi.org/10.1111/j.1365-3040.2011.02471.x) |
| |
| """) |
|
|
|
|
| |
|
|
|
|
| if _data_section == "Data Explorer": |
|
|
| st.header("Data Explorer") |
|
|
| st.markdown( |
|
|
| "This tab lets you explore the **raw data** behind the predictions. " |
|
|
| "Choose a data source below:\n\n" |
|
|
| "- **Vineyard sensors** \u2014 Photosynthesis rate **A**, PAR, leaf/air temperature, and \u0394T from on-site crop sensors.\n" |
|
|
| "- **Weather station data** \u2014 IMS station 43 (Sde Boker) and the merged dataset used for ML training.\n" |
|
|
| "- **AI Data Engineering** \u2014 **Gemini-powered** sensor anomaly detection (Z-score/IQR + physical bounds) and engineered features " |
|
|
| "(cyclical time encodings, Stress Risk Score). Run the pipeline and inspect thresholds, cleaning summary, and the daytime stress profile." |
|
|
| ) |
|
|
| eda_stage = st.radio( |
|
|
| "Data source", |
|
|
| ["Vineyard sensors", "Weather station data", "AI Data Engineering"], |
|
|
| horizontal=True, |
|
|
| label_visibility="visible", |
|
|
| ) |
|
|
|
|
| if eda_stage == "Vineyard sensors": |
|
|
| st.subheader("Vineyard sensor data") |
|
|
| with st.expander("About this data"): |
|
|
| st.markdown( |
|
|
| "Shows the distribution and temporal patterns of the computed photosynthesis " |
|
|
| "rate **A**, plus the raw sensor inputs used to calculate it. " |
|
|
| "This helps verify that the model produces physiologically plausible values." |
|
|
| ) |
|
|
| try: |
|
|
| from scripts.eda import get_stage1_eda |
|
|
| s1 = get_stage1_eda() |
|
|
| except Exception as e: |
|
|
| st.error(str(e)) |
|
|
| s1 = {"error": str(e)} |
|
|
|
|
| if s1.get("error"): |
|
|
| st.warning(s1["error"]) |
|
|
| else: |
|
|
| stats = s1["labels_stats"] |
|
|
| c1, c2, c3, c4 = st.columns(4) |
|
|
| c1.metric("Observations", stats["count"]) |
|
|
| c2.metric("Mean A", f"{stats['A_mean']:.2f}") |
|
|
| c3.metric("Std A", f"{stats['A_std']:.2f}") |
|
|
| c4.metric("Range", f"{stats['A_min']:.1f} \u2013 {stats['A_max']:.1f}") |
|
|
| st.caption(f"Date range: {stats['date_min']} to {stats['date_max']}") |
|
|
|
|
| if _HAS_PLOTLY and s1.get("labels") is not None: |
|
|
| A = s1["labels"].iloc[:, 0] |
|
|
| with st.expander("About: Distribution of A"): |
|
|
| st.markdown( |
|
|
| "Histogram of all computed A values. A right-skewed distribution is typical: " |
|
|
| "many low-A values (early/late day, cloudy) with a tail of high-A values " |
|
|
| "(midday, full sun). The peak should be between 5\u201315 \u00b5mol m\u207b\u00b2 s\u207b\u00b9 for grapevines." |
|
|
| ) |
|
|
| fig = px.histogram(x=A[A >= 1].dropna(), nbins=50, title="Distribution of A (Stage 1 labels, A \u2265 1)") |
|
|
| fig.update_layout(xaxis_title="A (\u00b5mol m\u207b\u00b2 s\u207b\u00b9)", xaxis_range=[1, None]) |
|
|
| st.plotly_chart(fig) |
|
|
|
|
| with st.expander("About: A over time"): |
|
|
| st.markdown( |
|
|
| "Time series of A across the dataset. Only the **growing season** " |
|
|
| "(May\u2013Sep) is included \u2014 the gaps between clusters represent the " |
|
|
| "dormant months (Oct\u2013Apr) when the vine does not photosynthesize " |
|
|
| "and no data is collected. Within each season, look for diurnal " |
|
|
| "oscillations and any anomalous spikes that may indicate sensor issues." |
|
|
| ) |
|
|
| |
|
|
| A_daily = A.resample("D").mean().dropna() |
|
|
| fig2 = go.Figure() |
|
|
| fig2.add_trace(go.Scatter( |
|
|
| x=A_daily.index, y=A_daily.values, mode="lines", |
|
|
| name="A (daily mean)", line=dict(width=1.5, color=_BRAND_GREEN), |
|
|
| connectgaps=False, |
|
|
| )) |
|
|
| fig2.update_layout( |
|
|
| title="A over time (daily mean)", |
|
|
| xaxis_title="Time", yaxis_title="A (\u00b5mol m\u207b\u00b2 s\u207b\u00b9)", |
|
|
| ) |
|
|
| st.plotly_chart(fig2) |
|
|
|
|
| if s1.get("sensor_sample") is not None and not s1["sensor_sample"].empty and _HAS_PLOTLY: |
|
|
| df = s1["sensor_sample"] |
|
|
| st.subheader("Sensor distributions (daytime PAR > 50)") |
|
|
| with st.expander("About sensor distributions"): |
|
|
| st.markdown( |
|
|
| "Histograms of the main sensor inputs used in the Farquhar model, filtered " |
|
|
| "to daytime only (PAR > 50 \u00b5mol m\u207b\u00b2 s\u207b\u00b9).\n\n" |
|
|
| "- **PAR:** Light energy for photosynthesis (400\u2013700 nm). " |
|
|
| "Values above 2500 are sensor artifacts and are excluded.\n" |
|
|
| "- **Leaf Temp:** Leaf surface temperature (\u00b0C).\n" |
|
|
| "- **Air Temp:** Ambient temperature near the canopy (\u00b0C)." |
|
|
| ) |
|
|
| sensor_cols = [c for c in ["Air1_PAR_ref", "Air1_leafTemperature_ref", "Air1_airTemperature_ref"] if c in df.columns] |
|
|
| if sensor_cols: |
|
|
| cols = st.columns(len(sensor_cols)) |
|
|
| for col_st, col_name in zip(cols, sensor_cols): |
|
|
| with col_st: |
|
|
| series = df[col_name].dropna() |
|
|
| |
|
|
| if col_name == "Air1_PAR_ref": |
|
|
| series = series[series <= 2500] |
|
|
| fig = px.histogram(series, nbins=40, title=col_name.replace("Air1_", "").replace("_ref", "")) |
|
|
| fig.update_layout(height=300) |
|
|
| st.plotly_chart(fig) |
|
|
|
|
| |
|
|
| if "Air1_leafTemperature_ref" in df.columns and "Air1_airTemperature_ref" in df.columns: |
|
|
| st.subheader("Leaf\u2013Air temperature difference (\u0394T)") |
|
|
| with st.expander("Why is \u0394T important?"): |
|
|
| st.markdown( |
|
|
| "The difference between leaf and air temperature " |
|
|
| "(**\u0394T = T_leaf \u2212 T_air**) is a direct indicator of " |
|
|
| "**plant water stress**.\n\n" |
|
|
| "- **\u0394T < 0** (leaf cooler than air): the vine is transpiring " |
|
|
| "normally \u2014 evaporative cooling keeps the leaf below air " |
|
|
| "temperature. The stomata are open and photosynthesis is active.\n" |
|
|
| "- **\u0394T \u2248 0**: transpiration is slowing down.\n" |
|
|
| "- **\u0394T > 0** (leaf warmer than air): the vine has partially " |
|
|
| "or fully closed its stomata due to water stress or extreme VPD. " |
|
|
| "Transpiration has stopped cooling the leaf, so it heats up " |
|
|
| "above ambient. Photosynthesis is severely limited.\n\n" |
|
|
| "This is the basis of the **Crop Water Stress Index (CWSI)** " |
|
|
| "used in the Farquhar model. In agrivoltaics, a rising \u0394T is " |
|
|
| "the signal that the vine would benefit from tracker shading: " |
|
|
| "the extra light cannot be used anyway because the stomata " |
|
|
| "are shut." |
|
|
| ) |
|
|
| delta_t = df["Air1_leafTemperature_ref"] - df["Air1_airTemperature_ref"] |
|
|
| delta_t = delta_t.dropna() |
|
|
| col_hist, col_time = st.columns(2) |
|
|
| with col_hist: |
|
|
| fig_dt = px.histogram( |
|
|
| delta_t, nbins=50, |
|
|
| title="\u0394T distribution (daytime)", |
|
|
| color_discrete_sequence=[_BRAND_GREEN], |
|
|
| ) |
|
|
| fig_dt.update_layout( |
|
|
| xaxis_title="\u0394T = T_leaf \u2212 T_air (\u00b0C)", |
|
|
| yaxis_title="Count", |
|
|
| height=350, |
|
|
| ) |
|
|
| fig_dt.add_vline(x=0, line_dash="dash", line_color="red", |
|
|
| annotation_text="T_leaf = T_air") |
|
|
| st.plotly_chart(fig_dt) |
|
|
| with col_time: |
|
|
| if "time" in df.columns: |
|
|
| |
|
|
| _ts = pd.to_datetime(df["time"], utc=True) |
|
|
| _grow_mask = _ts.dt.month.isin([5, 6, 7, 8, 9]) |
|
|
| _dt_grow = delta_t[_grow_mask] |
|
|
| _ts_grow = _ts[_grow_mask] |
|
|
| fig_dt2 = go.Figure() |
|
|
| fig_dt2.add_trace(go.Scatter( |
|
|
| x=_ts_grow, y=_dt_grow.values, |
|
|
| mode="markers", marker=dict(size=2, color=_BRAND_GREEN, opacity=0.4), |
|
|
| name="\u0394T", |
|
|
| )) |
|
|
| fig_dt2.add_hline(y=0, line_dash="dash", line_color="red") |
|
|
| fig_dt2.update_layout( |
|
|
| title="\u0394T over time", |
|
|
| xaxis_title="Time", |
|
|
| yaxis_title="\u0394T (\u00b0C)", |
|
|
| height=350, |
|
|
| ) |
|
|
| st.plotly_chart(fig_dt2) |
|
|
|
|
| elif eda_stage == "Weather station data": |
|
|
| st.subheader("Weather station data") |
|
|
| with st.expander("About this data"): |
|
|
| st.markdown( |
|
|
| "Shows the IMS weather station data and the merged dataset used for " |
|
|
| "prediction model training. This helps verify data overlap, check for " |
|
|
| "missing values, and understand the weather patterns." |
|
|
| ) |
|
|
| try: |
|
|
| from scripts.eda import get_stage2_eda |
|
|
| s2 = get_stage2_eda() |
|
|
| except Exception as e: |
|
|
| st.error(str(e)) |
|
|
| s2 = {"error": str(e)} |
|
|
|
|
| if s2.get("error"): |
|
|
| st.warning(s2["error"]) |
|
|
| else: |
|
|
| stats = s2["stats"] |
|
|
| c1, c2, c3 = st.columns(3) |
|
|
| c1.metric("IMS rows", f"{stats['ims_rows']:,}") |
|
|
| c2.metric("Merged rows", f"{stats['merged_rows']:,}") |
|
|
| c3.metric("Features", len(stats["feature_cols"])) |
|
|
| st.caption(f"IMS range: {stats['ims_date_min']} to {stats['ims_date_max']}") |
|
|
|
|
| with st.expander("What are the feature columns?"): |
|
|
| st.markdown( |
|
|
| "IMS weather variables and engineered time features used as " |
|
|
| "ML inputs. No on-site sensor data is included (strict separation to avoid leakage).\n\n" |
|
|
| "- **air_temperature_c, tdmax_c, tdmin_c:** Temperature from IMS station.\n" |
|
|
| "- **ghi_w_m2:** Global Horizontal Irradiance (solar radiation) \u2014 proxy for PAR.\n" |
|
|
| "- **rh_percent:** Relative humidity.\n" |
|
|
| "- **rain_mm:** Precipitation.\n" |
|
|
| "- **wind_speed_ms:** Wind speed.\n" |
|
|
| "- **hour_sin, hour_cos:** Cyclical encoding of hour-of-day.\n" |
|
|
| "- **doy_sin, doy_cos:** Cyclical encoding of day-of-year (seasonality)." |
|
|
| ) |
|
|
| merged = s2["merged"] |
|
|
| st.dataframe(merged.describe()) |
|
|
|
|
| if _HAS_PLOTLY and "A" in merged.columns: |
|
|
| with st.expander("About: Distribution of A (merged)"): |
|
|
| st.markdown( |
|
|
| "Distribution of A in the merged IMS+labels dataset. This is the subset " |
|
|
| "of Stage 1 labels that have matching IMS timestamps. Compare with Stage 1 " |
|
|
| "distribution to check for sampling bias." |
|
|
| ) |
|
|
| fig = px.histogram(merged["A"][merged["A"] >= 1].dropna(), nbins=50, title="Distribution of A (merged set, A \u2265 1)") |
|
|
| fig.update_layout(xaxis_title="A (\u00b5mol m\u207b\u00b2 s\u207b\u00b9)", xaxis_range=[1, None]) |
|
|
| st.plotly_chart(fig) |
|
|
|
|
| if _HAS_PLOTLY and merged is not None: |
|
|
| num_cols = [c for c in stats["feature_cols"] if c in merged.columns][:4] |
|
|
| if num_cols: |
|
|
| with st.expander("About: Feature distributions"): |
|
|
| st.markdown( |
|
|
| "Histograms of the first four numeric IMS features in the merged dataset. " |
|
|
| "Check for: reasonable value ranges, skewness, outliers, and missing-value " |
|
|
| "patterns that might affect model training." |
|
|
| ) |
|
|
| fig = make_subplots(rows=2, cols=2, subplot_titles=num_cols) |
|
|
| for i, col in enumerate(num_cols): |
|
|
| r, c = i // 2 + 1, i % 2 + 1 |
|
|
| fig.add_trace(go.Histogram(x=merged[col].dropna(), nbinsx=30), row=r, col=c) |
|
|
| fig.update_layout(title="Feature distributions (merged)") |
|
|
| st.plotly_chart(fig) |
|
|
|
|
| else: |
|
|
| |
|
|
| st.subheader("AI Data Engineering") |
|
|
| st.markdown( |
|
|
| "Gemini analyzes each sensor column's statistics against known physical constraints " |
|
|
| "for grapevines in the Negev desert, then returns per-column anomaly thresholds " |
|
|
| "(hard bounds + Z-score + IQR multiplier). The pipeline also generates five " |
|
|
| "engineered features fed directly into the ML prediction models." |
|
|
| ) |
|
|
|
|
| _llm_err = None |
|
|
| try: |
|
|
| from src.llm_data_engineer import LLMDataEngineer, SENSOR_CONTEXT |
|
|
|
|
| _loader_path = settings.SENSORS_WIDE_SAMPLE_PATH |
|
|
| if not _loader_path.exists(): |
|
|
| _loader_path = settings.SENSORS_WIDE_PATH |
|
|
| if not _loader_path.exists(): |
|
|
| raise FileNotFoundError("Sensor data file not found.") |
|
|
|
|
| _df_raw = pd.read_csv(_loader_path) |
|
|
| _engineer = LLMDataEngineer(verbose=False) |
|
|
| _key_cols = list(SENSOR_CONTEXT.keys()) |
|
|
| _present = [c for c in _key_cols if c in _df_raw.columns] |
|
|
|
|
| with st.spinner("Querying Gemini for anomaly thresholds…"): |
|
|
| _thresholds = _engineer.analyze_anomalies(_df_raw, columns=_present) |
|
|
|
|
| _df_clean = _engineer.apply_cleaning(_df_raw, _thresholds, strategy="clip") |
|
|
|
|
| with st.spinner("Querying Gemini for feature engineering spec…"): |
|
|
| _feat_spec = _engineer.get_feature_spec(list(_df_clean.columns)) |
|
|
|
|
| _df_eng = _engineer.engineer_features(_df_clean, feature_spec=_feat_spec) |
|
|
|
|
| |
|
|
| _viol_before, _viol_after = {}, {} |
|
|
| for _col, _t in _thresholds.items(): |
|
|
| if _col not in _df_raw.columns: |
|
|
| continue |
|
|
| _lo, _hi = _t.get("lower_bound"), _t.get("upper_bound") |
|
|
| _m = pd.Series(False, index=_df_raw.index) |
|
|
| if _lo is not None: |
|
|
| _m |= _df_raw[_col] < _lo |
|
|
| if _hi is not None: |
|
|
| _m |= _df_raw[_col] > _hi |
|
|
| _viol_before[_col] = int(_m.sum()) |
|
|
| _m2 = pd.Series(False, index=_df_clean.index) |
|
|
| if _lo is not None: |
|
|
| _m2 |= _df_clean[_col] < _lo |
|
|
| if _hi is not None: |
|
|
| _m2 |= _df_clean[_col] > _hi |
|
|
| _viol_after[_col] = int(_m2.sum()) |
|
|
|
|
| |
|
|
| _df_eng["_hr_local"] = (pd.to_datetime(_df_eng["time"], utc=True).dt.hour + 3) % 24 |
|
|
| _daytime = _df_eng[_df_eng["Air1_PAR_ref"] > 50] if "Air1_PAR_ref" in _df_eng.columns else _df_eng |
|
|
| _stress_profile = ( |
|
|
| _daytime.groupby("_hr_local")["stress_risk_score"].mean() |
|
|
| .reindex(range(24), fill_value=float("nan")) |
|
|
| ) |
|
|
|
|
| _used_gemini = "Statistical fallback" not in list(_thresholds.values())[0].get("rationale", "") |
|
|
|
|
| except Exception as _exc: |
|
|
| _llm_err = str(_exc) |
|
|
|
|
| if _llm_err: |
|
|
| st.error(f"Pipeline error: {_llm_err}") |
|
|
| else: |
|
|
| _source_badge = ( |
|
|
| "🤖 Thresholds sourced from **Gemini**" |
|
|
| if _used_gemini |
|
|
| else "⚙️ Thresholds from **statistical fallback** (set `GOOGLE_API_KEY` to enable Gemini)" |
|
|
| ) |
|
|
| st.caption(_source_badge) |
|
|
|
|
| |
|
|
| st.markdown("#### Gemini anomaly thresholds") |
|
|
| with st.expander("How are thresholds generated?"): |
|
|
| st.markdown( |
|
|
| "For each sensor column, `LLMDataEngineer` sends the full descriptive " |
|
|
| "statistics (min, max, percentiles) plus domain context — physical units, " |
|
|
| "expected range for the Negev site, known failure modes — to Gemini. " |
|
|
| "Gemini returns a JSON with:\n\n" |
|
|
| "- **Hard bounds** (`lower_bound` / `upper_bound`): values outside these are " |
|
|
| "physically impossible or known sensor faults.\n" |
|
|
| "- **Z-score threshold**: flags readings that deviate more than N standard " |
|
|
| "deviations from the column mean.\n" |
|
|
| "- **IQR multiplier**: flags readings outside Q1 − k·IQR … Q3 + k·IQR.\n\n" |
|
|
| "A reading is flagged only when the hard-bound violation OR both the Z-score " |
|
|
| "AND IQR conditions are met simultaneously. Default strategy is **clip** " |
|
|
| "(clamp to bounds), preserving row count." |
|
|
| ) |
|
|
|
|
| _thresh_rows = [] |
|
|
| _col_labels = { |
|
|
| "Air1_PAR_ref": "PAR (μmol/m²/s)", |
|
|
| "Air1_leafTemperature_ref": "T_leaf (°C)", |
|
|
| "Air1_airTemperature_ref": "T_air (°C)", |
|
|
| "Air1_VPD_ref": "VPD (kPa)", |
|
|
| "Air1_airHumidity_ref": "Humidity (%)", |
|
|
| "Air1_CO2_ref": "CO₂ raw (ppm)", |
|
|
| } |
|
|
| for _col in _present: |
|
|
| _t = _thresholds.get(_col, {}) |
|
|
| _vb = _viol_before.get(_col, 0) |
|
|
| _va = _viol_after.get(_col, 0) |
|
|
| _thresh_rows.append({ |
|
|
| "Sensor": _col_labels.get(_col, _col), |
|
|
| "Lower bound": _t.get("lower_bound", "—"), |
|
|
| "Upper bound": _t.get("upper_bound", "—"), |
|
|
| "Z-score σ": _t.get("zscore_threshold", "—"), |
|
|
| "IQR ×": _t.get("iqr_multiplier", "—"), |
|
|
| "Violations (raw)": _vb, |
|
|
| "After clip": _va, |
|
|
| "Rationale": _t.get("rationale", ""), |
|
|
| }) |
|
|
| _thresh_df = pd.DataFrame(_thresh_rows) |
|
|
| st.dataframe(_thresh_df, hide_index=True) |
|
|
|
|
| |
|
|
| st.markdown("#### Cleaning summary") |
|
|
| _total_viol = sum(_viol_before.values()) |
|
|
| _total_cleared = sum(v for v in _viol_after.values() if v == 0) |
|
|
| _pct_retained = len(_df_clean) / len(_df_raw) * 100 |
|
|
| _cm1, _cm2, _cm3, _cm4 = st.columns(4) |
|
|
| _cm1.metric("Rows in dataset", f"{len(_df_raw):,}") |
|
|
| _cm2.metric("Physical violations found", str(_total_viol)) |
|
|
| _cm3.metric("Columns fully cleared", f"{_total_cleared} / {len(_viol_before)}") |
|
|
| _cm4.metric("Rows retained (clip)", f"{_pct_retained:.1f}%") |
|
|
|
|
| if _HAS_PLOTLY and _total_viol > 0: |
|
|
| _viol_cols = [_col_labels.get(c, c) for c in _viol_before if _viol_before[c] > 0] |
|
|
| _viol_vals = [_viol_before[c] for c in _viol_before if _viol_before[c] > 0] |
|
|
| _fig_viol = px.bar( |
|
|
| x=_viol_cols, y=_viol_vals, |
|
|
| labels={"x": "Sensor", "y": "Violation count"}, |
|
|
| title="Physical violations by sensor (before cleaning)", |
|
|
| color_discrete_sequence=[_BRAND_GREEN], |
|
|
| ) |
|
|
| _fig_viol.update_layout(height=300) |
|
|
| st.plotly_chart(_fig_viol) |
|
|
|
|
| |
|
|
| if _HAS_PLOTLY: |
|
|
| st.markdown("#### Before vs after cleaning — PAR & VPD") |
|
|
| with st.expander("What to look for"): |
|
|
| st.markdown( |
|
|
| "The **raw** histogram (red) includes all sensor readings. " |
|
|
| "The **cleaned** histogram (green) shows the same column after " |
|
|
| "the Gemini-generated thresholds are applied. Outlier spikes at the " |
|
|
| "far right of PAR and VPD should disappear or be clipped to the bound." |
|
|
| ) |
|
|
| _ba_cols = st.columns(2) |
|
|
| for _idx, _col in enumerate(["Air1_PAR_ref", "Air1_VPD_ref"]): |
|
|
| if _col not in _df_raw.columns: |
|
|
| continue |
|
|
| _label = _col_labels.get(_col, _col) |
|
|
| _hi_bound = _thresholds.get(_col, {}).get("upper_bound") |
|
|
| _raw_s = _df_raw[_col].dropna() |
|
|
| _clean_s = _df_clean[_col].dropna() |
|
|
| _fig_ba = go.Figure() |
|
|
| _fig_ba.add_trace(go.Histogram( |
|
|
| x=_raw_s, nbinsx=60, name="Raw", |
|
|
| marker_color="crimson", opacity=0.55, |
|
|
| )) |
|
|
| _fig_ba.add_trace(go.Histogram( |
|
|
| x=_clean_s, nbinsx=60, name="Cleaned", |
|
|
| marker_color=_BRAND_GREEN, opacity=0.7, |
|
|
| )) |
|
|
| if _hi_bound is not None: |
|
|
| _fig_ba.add_vline( |
|
|
| x=_hi_bound, line_dash="dash", line_color="orange", |
|
|
| annotation_text=f"bound={_hi_bound}", |
|
|
| ) |
|
|
| _fig_ba.update_layout( |
|
|
| barmode="overlay", title=f"{_label} — raw vs cleaned", |
|
|
| xaxis_title=_label, height=320, |
|
|
| ) |
|
|
| with _ba_cols[_idx]: |
|
|
| st.plotly_chart(_fig_ba) |
|
|
|
|
| |
|
|
| st.markdown("#### Engineered features") |
|
|
| with st.expander("How are features engineered?"): |
|
|
| st.markdown( |
|
|
| "After cleaning, the pipeline asks Gemini to confirm the optimal weights " |
|
|
| "and normalisation bounds for the **Stress Risk Score**, given the available " |
|
|
| "sensor columns and the Semillon grapevine stress physiology. " |
|
|
| "It then computes five new columns:\n\n" |
|
|
| "| Feature | Formula | Purpose |\n" |
|
|
| "|---|---|---|\n" |
|
|
| "| `hour_sin` | sin(2π·h/24) | Cyclical hour-of-day |\n" |
|
|
| "| `hour_cos` | cos(2π·h/24) | Cyclical hour-of-day |\n" |
|
|
| "| `doy_sin` | sin(2π·d/365) | Seasonal position |\n" |
|
|
| "| `doy_cos` | cos(2π·d/365) | Seasonal position |\n" |
|
|
| "| `stress_risk_score` | w_VPD·norm(VPD) + w_CWSI·norm(CWSI) | Acute stress in [0, 1] |\n\n" |
|
|
| "Cyclical encodings ensure that midnight→01:00 and 23:00→midnight are " |
|
|
| "treated as equally close by the model — something a raw hour integer cannot do." |
|
|
| ) |
|
|
|
|
| |
|
|
| _fs_vpd_w = _feat_spec.get("vpd_weight", "—") |
|
|
| _fs_cwsi_w = _feat_spec.get("cwsi_weight", "—") |
|
|
| _fs_vpd_clip = _feat_spec.get("vpd_clip_max", "—") |
|
|
| _fs_rat = _feat_spec.get("rationale", "") |
|
|
| _fc1, _fc2, _fc3 = st.columns(3) |
|
|
| _fc1.metric("VPD weight", _fs_vpd_w) |
|
|
| _fc2.metric("CWSI weight", _fs_cwsi_w) |
|
|
| _fc3.metric("VPD clip max (kPa)", _fs_vpd_clip) |
|
|
| st.caption(f"Gemini rationale: {_fs_rat}") |
|
|
|
|
| |
|
|
| _eng_feat_cols = ["hour_sin", "hour_cos", "doy_sin", "doy_cos", "stress_risk_score"] |
|
|
| _feat_stats = ( |
|
|
| _df_eng[[c for c in _eng_feat_cols if c in _df_eng.columns]] |
|
|
| .describe(percentiles=[0.25, 0.5, 0.75]) |
|
|
| .loc[["min", "mean", "max"]] |
|
|
| .round(4) |
|
|
| ) |
|
|
| st.dataframe(_feat_stats) |
|
|
|
|
| |
|
|
| if _HAS_PLOTLY and "stress_risk_score" in _df_eng.columns: |
|
|
| st.markdown("#### Daytime stress profile") |
|
|
| with st.expander("How to read this chart"): |
|
|
| st.markdown( |
|
|
| "Mean **Stress Risk Score** per local hour (Israel = UTC+3), " |
|
|
| "computed over all daytime readings (PAR > 50 μmol m⁻² s⁻¹). " |
|
|
| "A score of 1.0 means the vine is under maximum atmospheric demand; " |
|
|
| "0.0 means no stress. The midday–afternoon peak is the primary " |
|
|
| "window where SolarWine shading interventions are concentrated." |
|
|
| ) |
|
|
| _profile_df = _stress_profile.dropna().reset_index() |
|
|
| _profile_df.columns = ["Hour (local)", "Stress Risk Score"] |
|
|
| _peak_hr = int(_profile_df.loc[_profile_df["Stress Risk Score"].idxmax(), "Hour (local)"]) |
|
|
| _fig_stress = px.bar( |
|
|
| _profile_df, |
|
|
| x="Hour (local)", y="Stress Risk Score", |
|
|
| color="Stress Risk Score", |
|
|
| color_continuous_scale=["#00BD3E", "#f5c518", "#e63946"], |
|
|
| range_y=[0, 1], |
|
|
| title=f"Hourly stress profile — peak at {_peak_hr:02d}:00 local", |
|
|
| ) |
|
|
| _fig_stress.add_hline( |
|
|
| y=0.5, line_dash="dot", line_color="orange", |
|
|
| annotation_text="Intervention threshold (0.5)", |
|
|
| ) |
|
|
| _fig_stress.update_layout( |
|
|
| xaxis=dict(tickmode="linear", dtick=1), |
|
|
| coloraxis_showscale=False, |
|
|
| height=380, |
|
|
| ) |
|
|
| st.plotly_chart(_fig_stress) |
|
|
| st.caption( |
|
|
| f"Peak stress: {_peak_hr:02d}:00 local " |
|
|
| f"(score = {_stress_profile.max():.3f}). " |
|
|
| f"Low-stress morning window (before 10:00): " |
|
|
| f"mean score = {_stress_profile.loc[6:9].mean():.3f} — shading withheld." |
|
|
| ) |
|
|
|
|
| |
|
|
| |
| |
| |
|
|