update stats script
This commit is contained in:
+60
-9
@@ -32,6 +32,8 @@ import pandas as pd
|
||||
LOG_DIR = Path("/var/log/nginx")
|
||||
OUTPUT_DIR = Path("/var/bincio/stats")
|
||||
OUTPUT = OUTPUT_DIR / "latest.png"
|
||||
HISTORY = OUTPUT_DIR / "weekly_history.csv"
|
||||
MAX_WEEKS = 26 # 6 months
|
||||
|
||||
# ── Log parsing ───────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -116,6 +118,22 @@ def _feature(referer: str) -> str | None:
|
||||
return label
|
||||
return None
|
||||
|
||||
# ── History management ────────────────────────────────────────────────────────
|
||||
|
||||
def load_history(history_file: Path) -> pd.DataFrame:
|
||||
if history_file.exists():
|
||||
try:
|
||||
df = pd.read_csv(history_file, parse_dates=["week_start"])
|
||||
return df.set_index("week_start")
|
||||
except Exception as e:
|
||||
print(f"Warning: could not load history: {e}", file=sys.stderr)
|
||||
return pd.DataFrame()
|
||||
return pd.DataFrame()
|
||||
|
||||
def save_history(history: pd.DataFrame, history_file: Path) -> None:
|
||||
history_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
history.reset_index().to_csv(history_file, index=False)
|
||||
|
||||
# ── Loading ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def load_logs(log_dir: Path) -> pd.DataFrame:
|
||||
@@ -176,19 +194,12 @@ def _style_ax(ax: plt.Axes) -> None:
|
||||
for spine in ax.spines.values():
|
||||
spine.set_edgecolor(GRID)
|
||||
|
||||
def make_figure(df: pd.DataFrame, output: Path) -> None:
|
||||
def make_figure(df: pd.DataFrame, feat_weekly: pd.DataFrame, output: Path) -> None:
|
||||
# ── daily logins ──────────────────────────────────────────────────────────
|
||||
login_mask = (df["method"] == "POST") & (df["path"] == "/api/auth/login") & (df["status"] == 200)
|
||||
weekly_logins = df[login_mask].set_index("ts").resample("W-MON").size()
|
||||
|
||||
# ── feature usage (weekly) ────────────────────────────────────────────────
|
||||
feat_df = df[df["feature"].notna()].copy()
|
||||
feat_weekly = (
|
||||
feat_df.set_index("ts")
|
||||
.groupby([pd.Grouper(freq="W-MON"), "feature"])
|
||||
.size()
|
||||
.unstack(fill_value=0)
|
||||
)
|
||||
feat_order = [f for f in FEATURE_COLORS if f in feat_weekly.columns]
|
||||
feat_weekly = feat_weekly[feat_order]
|
||||
|
||||
@@ -286,6 +297,7 @@ def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Generate bincio usage stats figure.")
|
||||
ap.add_argument("--log-dir", type=Path, default=LOG_DIR, metavar="DIR")
|
||||
ap.add_argument("--output", type=Path, default=OUTPUT, metavar="FILE")
|
||||
ap.add_argument("--history", type=Path, default=HISTORY, metavar="FILE")
|
||||
args = ap.parse_args()
|
||||
|
||||
print("Loading logs…", file=sys.stderr)
|
||||
@@ -296,7 +308,46 @@ def main() -> None:
|
||||
print(" Feature breakdown:", file=sys.stderr)
|
||||
for feat, count in feat_counts.items():
|
||||
print(f" {str(feat):12s} {count:,}", file=sys.stderr)
|
||||
make_figure(df, args.output)
|
||||
|
||||
# Load current week's feature usage
|
||||
feat_df = df[df["feature"].notna()].copy()
|
||||
current_weekly = (
|
||||
feat_df.set_index("ts")
|
||||
.groupby([pd.Grouper(freq="W-MON"), "feature"])
|
||||
.size()
|
||||
.unstack(fill_value=0)
|
||||
)
|
||||
|
||||
# Load historical data and merge
|
||||
history = load_history(args.history)
|
||||
if not history.empty:
|
||||
# Remove any weeks that overlap with current logs (in case of reruns)
|
||||
if len(current_weekly) > 0:
|
||||
latest_week_in_history = history.index.max()
|
||||
earliest_week_in_current = current_weekly.index.min()
|
||||
if latest_week_in_history >= earliest_week_in_current:
|
||||
history = history[history.index < earliest_week_in_current]
|
||||
|
||||
# Concatenate and drop duplicates
|
||||
all_weekly = pd.concat([history, current_weekly])
|
||||
all_weekly = all_weekly[~all_weekly.index.duplicated(keep='first')]
|
||||
else:
|
||||
all_weekly = current_weekly
|
||||
|
||||
# Keep only last MAX_WEEKS weeks
|
||||
if len(all_weekly) > MAX_WEEKS:
|
||||
all_weekly = all_weekly.iloc[-MAX_WEEKS:]
|
||||
|
||||
# Save updated history (all data we kept)
|
||||
save_history(all_weekly, args.history)
|
||||
|
||||
# Ensure all feature columns exist in all_weekly
|
||||
feat_order = [f for f in FEATURE_COLORS if f in all_weekly.columns]
|
||||
for feat in feat_order:
|
||||
if feat not in all_weekly.columns:
|
||||
all_weekly[feat] = 0
|
||||
|
||||
make_figure(df, all_weekly, args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user