Source code for telemetry_anomdet.ingest.dataset
# src/telemetry_anomdet/ingest/dataset.py
"""
Telemetry dataset loader.
This module a lightweight class called TelemetryDataset that wraps telemetry data in a consistent form factor for downstream processing.
"""
from __future__ import annotations
from dataclasses import dataclass
import pandas as pd
try:
from typing import Self
except ImportError:
from typing_extensions import Self
[docs]
@dataclass
class TelemetryDataset:
"""
Container for telemetry data.
"""
# Pandas DataFrame containing telemetry rows.
_df: pd.DataFrame
[docs]
@classmethod
def synthetic(cls) -> Self:
"""
Generate a small synthetic telemetry dataset for development / testing.
Parameters:
N/A for now.
Returns:
TelemetryDataset
"""
df = pd.DataFrame(
{
"timestamp": pd.to_datetime(
["2025-01-01T00:00:00Z", "2025-01-01T00:01:00Z"], utc = True
),
"variable": ["temp", "pressure"],
"value": [25.5, 1013.25],
}
).sort_values(["timestamp", "variable"], ignore_index = True)
return cls(df)
[docs]
def to_pandas(self) -> pd.DataFrame:
"""
Return a copy of the Dataframe to prevent accidental mutation.
"""
return self._df.copy()
@property
def data(self) -> pd.DataFrame:
"""
Direct access to the underlying DataFrame.
"""
return self._df
[docs]
def head(self, n: int = 5) -> pd.DataFrame:
"""
Head of the underlying DataFrame (copy).
"""
return self._df.head(n).copy()
def __len__(self) -> int:
return len(self._df)
def __repr__(self) -> str:
cols = list(self._df.columns)
return f"TelemetryDataset(n={len(self)}, cols={cols})"