61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
from datetime import timedelta
|
|
import pandas as pd
|
|
|
|
OUTLIER_FACTOR = 5
|
|
|
|
|
|
def detect_groups(df: pd.DataFrame, outlier_factor: float = OUTLIER_FACTOR,
|
|
manual_splits: list | None = None,
|
|
forced_splits: list | None = None) -> list:
|
|
"""Split shots into groups.
|
|
|
|
forced_splits: when provided, ONLY these split positions are used — auto-detection
|
|
is bypassed entirely. Use this for user-defined groupings from the visual editor.
|
|
|
|
manual_splits: added on top of auto-detected splits (when forced_splits is None).
|
|
Both auto+manual mechanisms are merged and deduplicated.
|
|
"""
|
|
if len(df) <= 1:
|
|
return [df]
|
|
|
|
def _build_groups(all_splits):
|
|
if not all_splits:
|
|
return [df]
|
|
groups = []
|
|
prev = 0
|
|
for pos in all_splits:
|
|
group = df.iloc[prev:pos]
|
|
if len(group) > 0:
|
|
groups.append(group.reset_index(drop=True))
|
|
prev = pos
|
|
last = df.iloc[prev:]
|
|
if len(last) > 0:
|
|
groups.append(last.reset_index(drop=True))
|
|
return groups
|
|
|
|
# Forced mode: user controls exact split positions, no auto-detection
|
|
if forced_splits is not None:
|
|
valid = sorted(s for s in forced_splits if 0 < s < len(df))
|
|
return _build_groups(valid)
|
|
|
|
times = df["time"]
|
|
diffs = times.diff().dropna()
|
|
|
|
if diffs.empty:
|
|
return [df]
|
|
|
|
median_gap = diffs.median()
|
|
|
|
# Auto-detect splits based on time gaps
|
|
auto_splits: set[int] = set()
|
|
if median_gap != timedelta(0):
|
|
threshold = outlier_factor * median_gap
|
|
for idx, gap in diffs.items():
|
|
if gap > threshold:
|
|
pos = df.index.get_loc(idx)
|
|
auto_splits.add(pos)
|
|
|
|
# Merge with manual splits (filter to valid range)
|
|
extra = set(manual_splits) if manual_splits else set()
|
|
return _build_groups(sorted(auto_splits | extra))
|