Source code for lours.dataset.io.parquet

from pathlib import Path

from lours.dataset import Dataset
from lours.utils import parquet_saver


[docs] def from_parquet(input_path: Path | str) -> Dataset: """Load a Dataset object from a folder with parquet files for its dataframes. Other attributes will be loaded from dataset.yaml file in the same folder. Args: input_path: Folder to read the yaml file and parquet files from. Raises: ValueError: Raised when the object name contained in ``input_dict['__name__']`` is not 'Dataset'. Returns: Loaded dataset """ data = parquet_saver.dict_from_parquet(Path(input_path)) if data["__name__"] != "Dataset": raise ValueError( "Wrong object type for parquet archive. Expected Dataset, got" f" {data['__name__']}" ) dataset = Dataset( images_root=data["images_root"], images=data["images"], annotations=data["annotations"], label_map=data["label_map"], ) if "booleanized_columns" in data: dataset.booleanized_columns = data["booleanized_columns"] return dataset