update stats script
This commit is contained in:
+60
-9
@@ -32,6 +32,8 @@ import pandas as pd
|
|||||||
LOG_DIR = Path("/var/log/nginx")
|
LOG_DIR = Path("/var/log/nginx")
|
||||||
OUTPUT_DIR = Path("/var/bincio/stats")
|
OUTPUT_DIR = Path("/var/bincio/stats")
|
||||||
OUTPUT = OUTPUT_DIR / "latest.png"
|
OUTPUT = OUTPUT_DIR / "latest.png"
|
||||||
|
HISTORY = OUTPUT_DIR / "weekly_history.csv"
|
||||||
|
MAX_WEEKS = 26 # 6 months
|
||||||
|
|
||||||
# ── Log parsing ───────────────────────────────────────────────────────────────
|
# ── Log parsing ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -116,6 +118,22 @@ def _feature(referer: str) -> str | None:
|
|||||||
return label
|
return label
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# ── History management ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def load_history(history_file: Path) -> pd.DataFrame:
|
||||||
|
if history_file.exists():
|
||||||
|
try:
|
||||||
|
df = pd.read_csv(history_file, parse_dates=["week_start"])
|
||||||
|
return df.set_index("week_start")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: could not load history: {e}", file=sys.stderr)
|
||||||
|
return pd.DataFrame()
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
def save_history(history: pd.DataFrame, history_file: Path) -> None:
|
||||||
|
history_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
history.reset_index().to_csv(history_file, index=False)
|
||||||
|
|
||||||
# ── Loading ───────────────────────────────────────────────────────────────────
|
# ── Loading ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def load_logs(log_dir: Path) -> pd.DataFrame:
|
def load_logs(log_dir: Path) -> pd.DataFrame:
|
||||||
@@ -176,19 +194,12 @@ def _style_ax(ax: plt.Axes) -> None:
|
|||||||
for spine in ax.spines.values():
|
for spine in ax.spines.values():
|
||||||
spine.set_edgecolor(GRID)
|
spine.set_edgecolor(GRID)
|
||||||
|
|
||||||
def make_figure(df: pd.DataFrame, output: Path) -> None:
|
def make_figure(df: pd.DataFrame, feat_weekly: pd.DataFrame, output: Path) -> None:
|
||||||
# ── daily logins ──────────────────────────────────────────────────────────
|
# ── daily logins ──────────────────────────────────────────────────────────
|
||||||
login_mask = (df["method"] == "POST") & (df["path"] == "/api/auth/login") & (df["status"] == 200)
|
login_mask = (df["method"] == "POST") & (df["path"] == "/api/auth/login") & (df["status"] == 200)
|
||||||
weekly_logins = df[login_mask].set_index("ts").resample("W-MON").size()
|
weekly_logins = df[login_mask].set_index("ts").resample("W-MON").size()
|
||||||
|
|
||||||
# ── feature usage (weekly) ────────────────────────────────────────────────
|
# ── feature usage (weekly) ────────────────────────────────────────────────
|
||||||
feat_df = df[df["feature"].notna()].copy()
|
|
||||||
feat_weekly = (
|
|
||||||
feat_df.set_index("ts")
|
|
||||||
.groupby([pd.Grouper(freq="W-MON"), "feature"])
|
|
||||||
.size()
|
|
||||||
.unstack(fill_value=0)
|
|
||||||
)
|
|
||||||
feat_order = [f for f in FEATURE_COLORS if f in feat_weekly.columns]
|
feat_order = [f for f in FEATURE_COLORS if f in feat_weekly.columns]
|
||||||
feat_weekly = feat_weekly[feat_order]
|
feat_weekly = feat_weekly[feat_order]
|
||||||
|
|
||||||
@@ -286,6 +297,7 @@ def main() -> None:
|
|||||||
ap = argparse.ArgumentParser(description="Generate bincio usage stats figure.")
|
ap = argparse.ArgumentParser(description="Generate bincio usage stats figure.")
|
||||||
ap.add_argument("--log-dir", type=Path, default=LOG_DIR, metavar="DIR")
|
ap.add_argument("--log-dir", type=Path, default=LOG_DIR, metavar="DIR")
|
||||||
ap.add_argument("--output", type=Path, default=OUTPUT, metavar="FILE")
|
ap.add_argument("--output", type=Path, default=OUTPUT, metavar="FILE")
|
||||||
|
ap.add_argument("--history", type=Path, default=HISTORY, metavar="FILE")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
print("Loading logs…", file=sys.stderr)
|
print("Loading logs…", file=sys.stderr)
|
||||||
@@ -296,7 +308,46 @@ def main() -> None:
|
|||||||
print(" Feature breakdown:", file=sys.stderr)
|
print(" Feature breakdown:", file=sys.stderr)
|
||||||
for feat, count in feat_counts.items():
|
for feat, count in feat_counts.items():
|
||||||
print(f" {str(feat):12s} {count:,}", file=sys.stderr)
|
print(f" {str(feat):12s} {count:,}", file=sys.stderr)
|
||||||
make_figure(df, args.output)
|
|
||||||
|
# Load current week's feature usage
|
||||||
|
feat_df = df[df["feature"].notna()].copy()
|
||||||
|
current_weekly = (
|
||||||
|
feat_df.set_index("ts")
|
||||||
|
.groupby([pd.Grouper(freq="W-MON"), "feature"])
|
||||||
|
.size()
|
||||||
|
.unstack(fill_value=0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load historical data and merge
|
||||||
|
history = load_history(args.history)
|
||||||
|
if not history.empty:
|
||||||
|
# Remove any weeks that overlap with current logs (in case of reruns)
|
||||||
|
if len(current_weekly) > 0:
|
||||||
|
latest_week_in_history = history.index.max()
|
||||||
|
earliest_week_in_current = current_weekly.index.min()
|
||||||
|
if latest_week_in_history >= earliest_week_in_current:
|
||||||
|
history = history[history.index < earliest_week_in_current]
|
||||||
|
|
||||||
|
# Concatenate and drop duplicates
|
||||||
|
all_weekly = pd.concat([history, current_weekly])
|
||||||
|
all_weekly = all_weekly[~all_weekly.index.duplicated(keep='first')]
|
||||||
|
else:
|
||||||
|
all_weekly = current_weekly
|
||||||
|
|
||||||
|
# Keep only last MAX_WEEKS weeks
|
||||||
|
if len(all_weekly) > MAX_WEEKS:
|
||||||
|
all_weekly = all_weekly.iloc[-MAX_WEEKS:]
|
||||||
|
|
||||||
|
# Save updated history (all data we kept)
|
||||||
|
save_history(all_weekly, args.history)
|
||||||
|
|
||||||
|
# Ensure all feature columns exist in all_weekly
|
||||||
|
feat_order = [f for f in FEATURE_COLORS if f in all_weekly.columns]
|
||||||
|
for feat in feat_order:
|
||||||
|
if feat not in all_weekly.columns:
|
||||||
|
all_weekly[feat] = 0
|
||||||
|
|
||||||
|
make_figure(df, all_weekly, args.output)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user