From 1dca00d5e3b99ecc685219fbbaa867db53fd6af9 Mon Sep 17 00:00:00 2001 From: Davide Scaini Date: Tue, 2 Jun 2026 15:47:55 +0200 Subject: [PATCH] update stats script --- scripts/usage_stats.py | 69 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/scripts/usage_stats.py b/scripts/usage_stats.py index 7e22f93..a3f88e7 100644 --- a/scripts/usage_stats.py +++ b/scripts/usage_stats.py @@ -32,6 +32,8 @@ import pandas as pd LOG_DIR = Path("/var/log/nginx") OUTPUT_DIR = Path("/var/bincio/stats") OUTPUT = OUTPUT_DIR / "latest.png" +HISTORY = OUTPUT_DIR / "weekly_history.csv" +MAX_WEEKS = 26 # 6 months # ── Log parsing ─────────────────────────────────────────────────────────────── @@ -116,6 +118,22 @@ def _feature(referer: str) -> str | None: return label return None +# ── History management ──────────────────────────────────────────────────────── + +def load_history(history_file: Path) -> pd.DataFrame: + if history_file.exists(): + try: + df = pd.read_csv(history_file, parse_dates=["week_start"]) + return df.set_index("week_start") + except Exception as e: + print(f"Warning: could not load history: {e}", file=sys.stderr) + return pd.DataFrame() + return pd.DataFrame() + +def save_history(history: pd.DataFrame, history_file: Path) -> None: + history_file.parent.mkdir(parents=True, exist_ok=True) + history.reset_index().to_csv(history_file, index=False) + # ── Loading ─────────────────────────────────────────────────────────────────── def load_logs(log_dir: Path) -> pd.DataFrame: @@ -176,19 +194,12 @@ def _style_ax(ax: plt.Axes) -> None: for spine in ax.spines.values(): spine.set_edgecolor(GRID) -def make_figure(df: pd.DataFrame, output: Path) -> None: +def make_figure(df: pd.DataFrame, feat_weekly: pd.DataFrame, output: Path) -> None: # ── daily logins ────────────────────────────────────────────────────────── login_mask = (df["method"] == "POST") & (df["path"] == "/api/auth/login") & (df["status"] == 200) weekly_logins = df[login_mask].set_index("ts").resample("W-MON").size() # ── feature usage (weekly) ──────────────────────────────────────────────── - feat_df = df[df["feature"].notna()].copy() - feat_weekly = ( - feat_df.set_index("ts") - .groupby([pd.Grouper(freq="W-MON"), "feature"]) - .size() - .unstack(fill_value=0) - ) feat_order = [f for f in FEATURE_COLORS if f in feat_weekly.columns] feat_weekly = feat_weekly[feat_order] @@ -286,6 +297,7 @@ def main() -> None: ap = argparse.ArgumentParser(description="Generate bincio usage stats figure.") ap.add_argument("--log-dir", type=Path, default=LOG_DIR, metavar="DIR") ap.add_argument("--output", type=Path, default=OUTPUT, metavar="FILE") + ap.add_argument("--history", type=Path, default=HISTORY, metavar="FILE") args = ap.parse_args() print("Loading logs…", file=sys.stderr) @@ -296,7 +308,46 @@ def main() -> None: print(" Feature breakdown:", file=sys.stderr) for feat, count in feat_counts.items(): print(f" {str(feat):12s} {count:,}", file=sys.stderr) - make_figure(df, args.output) + + # Load current week's feature usage + feat_df = df[df["feature"].notna()].copy() + current_weekly = ( + feat_df.set_index("ts") + .groupby([pd.Grouper(freq="W-MON"), "feature"]) + .size() + .unstack(fill_value=0) + ) + + # Load historical data and merge + history = load_history(args.history) + if not history.empty: + # Remove any weeks that overlap with current logs (in case of reruns) + if len(current_weekly) > 0: + latest_week_in_history = history.index.max() + earliest_week_in_current = current_weekly.index.min() + if latest_week_in_history >= earliest_week_in_current: + history = history[history.index < earliest_week_in_current] + + # Concatenate and drop duplicates + all_weekly = pd.concat([history, current_weekly]) + all_weekly = all_weekly[~all_weekly.index.duplicated(keep='first')] + else: + all_weekly = current_weekly + + # Keep only last MAX_WEEKS weeks + if len(all_weekly) > MAX_WEEKS: + all_weekly = all_weekly.iloc[-MAX_WEEKS:] + + # Save updated history (all data we kept) + save_history(all_weekly, args.history) + + # Ensure all feature columns exist in all_weekly + feat_order = [f for f in FEATURE_COLORS if f in all_weekly.columns] + for feat in feat_order: + if feat not in all_weekly.columns: + all_weekly[feat] = 0 + + make_figure(df, all_weekly, args.output) if __name__ == "__main__":