update stats script

This commit is contained in:
Davide Scaini
2026-06-02 15:47:55 +02:00
parent fa61801580
commit 1dca00d5e3
+60 -9
View File
@@ -32,6 +32,8 @@ import pandas as pd
LOG_DIR = Path("/var/log/nginx")
OUTPUT_DIR = Path("/var/bincio/stats")
OUTPUT = OUTPUT_DIR / "latest.png"
HISTORY = OUTPUT_DIR / "weekly_history.csv"
MAX_WEEKS = 26 # 6 months
# ── Log parsing ───────────────────────────────────────────────────────────────
@@ -116,6 +118,22 @@ def _feature(referer: str) -> str | None:
return label
return None
# ── History management ────────────────────────────────────────────────────────
def load_history(history_file: Path) -> pd.DataFrame:
if history_file.exists():
try:
df = pd.read_csv(history_file, parse_dates=["week_start"])
return df.set_index("week_start")
except Exception as e:
print(f"Warning: could not load history: {e}", file=sys.stderr)
return pd.DataFrame()
return pd.DataFrame()
def save_history(history: pd.DataFrame, history_file: Path) -> None:
history_file.parent.mkdir(parents=True, exist_ok=True)
history.reset_index().to_csv(history_file, index=False)
# ── Loading ───────────────────────────────────────────────────────────────────
def load_logs(log_dir: Path) -> pd.DataFrame:
@@ -176,19 +194,12 @@ def _style_ax(ax: plt.Axes) -> None:
for spine in ax.spines.values():
spine.set_edgecolor(GRID)
def make_figure(df: pd.DataFrame, output: Path) -> None:
def make_figure(df: pd.DataFrame, feat_weekly: pd.DataFrame, output: Path) -> None:
# ── daily logins ──────────────────────────────────────────────────────────
login_mask = (df["method"] == "POST") & (df["path"] == "/api/auth/login") & (df["status"] == 200)
weekly_logins = df[login_mask].set_index("ts").resample("W-MON").size()
# ── feature usage (weekly) ────────────────────────────────────────────────
feat_df = df[df["feature"].notna()].copy()
feat_weekly = (
feat_df.set_index("ts")
.groupby([pd.Grouper(freq="W-MON"), "feature"])
.size()
.unstack(fill_value=0)
)
feat_order = [f for f in FEATURE_COLORS if f in feat_weekly.columns]
feat_weekly = feat_weekly[feat_order]
@@ -286,6 +297,7 @@ def main() -> None:
ap = argparse.ArgumentParser(description="Generate bincio usage stats figure.")
ap.add_argument("--log-dir", type=Path, default=LOG_DIR, metavar="DIR")
ap.add_argument("--output", type=Path, default=OUTPUT, metavar="FILE")
ap.add_argument("--history", type=Path, default=HISTORY, metavar="FILE")
args = ap.parse_args()
print("Loading logs…", file=sys.stderr)
@@ -296,7 +308,46 @@ def main() -> None:
print(" Feature breakdown:", file=sys.stderr)
for feat, count in feat_counts.items():
print(f" {str(feat):12s} {count:,}", file=sys.stderr)
make_figure(df, args.output)
# Load current week's feature usage
feat_df = df[df["feature"].notna()].copy()
current_weekly = (
feat_df.set_index("ts")
.groupby([pd.Grouper(freq="W-MON"), "feature"])
.size()
.unstack(fill_value=0)
)
# Load historical data and merge
history = load_history(args.history)
if not history.empty:
# Remove any weeks that overlap with current logs (in case of reruns)
if len(current_weekly) > 0:
latest_week_in_history = history.index.max()
earliest_week_in_current = current_weekly.index.min()
if latest_week_in_history >= earliest_week_in_current:
history = history[history.index < earliest_week_in_current]
# Concatenate and drop duplicates
all_weekly = pd.concat([history, current_weekly])
all_weekly = all_weekly[~all_weekly.index.duplicated(keep='first')]
else:
all_weekly = current_weekly
# Keep only last MAX_WEEKS weeks
if len(all_weekly) > MAX_WEEKS:
all_weekly = all_weekly.iloc[-MAX_WEEKS:]
# Save updated history (all data we kept)
save_history(all_weekly, args.history)
# Ensure all feature columns exist in all_weekly
feat_order = [f for f in FEATURE_COLORS if f in all_weekly.columns]
for feat in feat_order:
if feat not in all_weekly.columns:
all_weekly[feat] = 0
make_figure(df, all_weekly, args.output)
if __name__ == "__main__":