Code part 4🥂🍾
for i in range(total):
subj_id = i 1
if subj_id in completed:
continue
A, group = datasets[i]
print(f" {subj_id}/{total} {group} ...", end=" ", flush=True)
res = analyze_subject(A, group, subj_id, sigmas, params)
results.append(res)
completed.add(subj_id)
processed_counter = 1
if len(completed) % args.batch_size == 0 or (subj_id == total):
save_checkpoint_json(completed, args.output_dir)
partial_df = pd.DataFrame(results)
if os.path.exists(partial_path):
try:
old_df =
pd.read_csv(partial_path)
partial_df = pd.concat([old_df, partial_df], ignore_index=True)
partial_df = partial_df.drop_duplicates(subset=["subject"], keep="last")
except Exception:
pass
atomic_csv_save(partial_df, partial_path)
# Report progress based on how many subjects were processed in THIS run
if processed_counter > 0 and processed_counter % 20 == 0:
elapsed = time.time() - t0
est = (elapsed / processed_counter) * (total - len(completed))
print(f"done | Est. remaining: {str(timedelta(seconds=int(est)))}")
else:
print("done")
final_df = pd.DataFrame(results)
if os.path.exists(partial_path):
try:
old_df =
pd.read_csv(partial_path)
final_df = pd.concat([old_df, final_df], ignore_index=True)
final_df = final_df.drop_duplicates(subset=["subject"], keep="last")
except Exception:
pass
atomic_csv_save(final_df, csv_path)
print(f"\n Saved final: {csv_path}")
df = final_df
print("\n[3/4] Statistical Analysis...")
nan_report = df.groupby("group")["critical_width"].apply(lambda x: x.isna().sum())
print(f"\nNaN count in critical_width: {dict(nan_report)}")
print("\n=== Descriptive ===")
print(df.groupby("group")[["ipr", "critical_width", "peak_sigma", "chi_peak", "auc"]].mean().round(4))
print("\n--- Spearman: IPR ↔ Critical Width ---")
x, y = df["ipr"].values, df["critical_width"].values
mean_r, lo, hi = bootstrap_spearman(x, y, n_boot=params["bootstrap"], seed=args.seed)
print(f"Mean ρ = {mean_r:.4f} 95% CI: [{lo:.4f}, {hi:.4f}]")
print("\n--- Partial Spearman (control: lambda_max) ---")
pr, pp = partial_spearman(df, "ipr", "critical_width", ["lambda_max"])
print(f"Partial ρ = {pr:.4f}, p = {pp:.4f}")
print("\n--- Kruskal-Wallis ---")
gdata = [df[
df.group == g]["critical_width"].dropna().values for g in ["HC", "TLE", "GTCS"]]
if all(len(g) > 5 for g in gdata):
H, pkw = kruskal(*gdata)
print(f"H = {H:.3f}, p = {pkw:.5f}")
print("\n--- Post-hoc MWU FDR Effect Size ---")
pairs = list(combinations(["HC", "TLE", "GTCS"], 2))
pvals, mwu_res = [], []
for g1, g2 in pairs:
cw1 = df[
df.group == g1]["critical_width"].dropna().values
cw2 = df[
df.group == g2]["critical_width"].dropna().values
if len(cw1) < 5 or len(cw2) < 5:
continue
u, p = mannwhitneyu(cw1, cw2, alternative="two-sided")
r, direction = rank_biserial(len(cw1), len(cw2), u, g1, g2, np.nanmean(cw1), np.nanmean(cw2))
pvals.append(p)
mwu_res.append((g1, g2, p, r, direction))
fdr = benjamini_hochberg_fdr(pvals)
for i, (g1, g2, p, r, direction) in enumerate(mwu_res):
print(f"{g1} vs {g2}: p={p:.4f} (FDR q={fdr[i]:.4f}), r={r:.3f} ({direction})")