from datetime import timedelta import pandas as pd OUTLIER_FACTOR = 5 def detect_groups(df: pd.DataFrame, outlier_factor: float = OUTLIER_FACTOR, manual_splits: list | None = None, forced_splits: list | None = None) -> list: """Split shots into groups. forced_splits: when provided, ONLY these split positions are used — auto-detection is bypassed entirely. Use this for user-defined groupings from the visual editor. manual_splits: added on top of auto-detected splits (when forced_splits is None). Both auto+manual mechanisms are merged and deduplicated. """ if len(df) <= 1: return [df] def _build_groups(all_splits): if not all_splits: return [df] groups = [] prev = 0 for pos in all_splits: group = df.iloc[prev:pos] if len(group) > 0: groups.append(group.reset_index(drop=True)) prev = pos last = df.iloc[prev:] if len(last) > 0: groups.append(last.reset_index(drop=True)) return groups # Forced mode: user controls exact split positions, no auto-detection if forced_splits is not None: valid = sorted(s for s in forced_splits if 0 < s < len(df)) return _build_groups(valid) times = df["time"] diffs = times.diff().dropna() if diffs.empty: return [df] median_gap = diffs.median() # Auto-detect splits based on time gaps auto_splits: set[int] = set() if median_gap != timedelta(0): threshold = outlier_factor * median_gap for idx, gap in diffs.items(): if gap > threshold: pos = df.index.get_loc(idx) auto_splits.add(pos) # Merge with manual splits (filter to valid range) extra = set(manual_splits) if manual_splits else set() return _build_groups(sorted(auto_splits | extra))