leaf_engine
===========

.. py:module:: leaf_engine


Subpackages
-----------

.. toctree::
   :maxdepth: 1

   /autoapi/leaf_engine/adapt/index
   /autoapi/leaf_engine/domain/index
   /autoapi/leaf_engine/etl/index
   /autoapi/leaf_engine/io/index
   /autoapi/leaf_engine/utils/index


Submodules
----------

.. toctree::
   :maxdepth: 1

   /autoapi/leaf_engine/logging/index
   /autoapi/leaf_engine/params/index


Functions
---------

.. autoapisummary::

   leaf_engine.cluster_pipeline
   leaf_engine.concat_pipeline
   leaf_engine.filter_pipeline
   leaf_engine.flag_pipeline
   leaf_engine.fsc_pipeline
   leaf_engine.fuel_pipeline
   leaf_engine.geocode_pipeline
   leaf_engine.load_pipeline
   leaf_engine.map_pipeline
   leaf_engine.output_pipeline
   leaf_engine.pcmiler_pipeline
   leaf_engine.pull_lighthouse_data
   leaf_engine.read_csv
   leaf_engine.read_data
   leaf_engine.read_dataset
   leaf_engine.read_drive
   leaf_engine.read_params
   leaf_engine.resolve_geocoding
   leaf_engine.resolve_miles
   leaf_engine.setup
   leaf_engine.to_drive
   leaf_engine.upload_to_drive
   leaf_engine.uuid_pipeline
   leaf_engine.validate_pipeline


Package Contents
----------------

.. py:function:: cluster_pipeline(df: pandas.DataFrame) -> pandas.DataFrame

.. py:function:: concat_pipeline(input_dfs: List[pandas.DataFrame], lane_level=False) -> pandas.DataFrame

.. py:function:: filter_pipeline(df)

.. py:function:: flag_pipeline(df: pandas.DataFrame) -> pandas.DataFrame

   Public flagging pipeline. Pipe shipments through this function to augment
   them with flagging columns.

   :param df: Shipments DataFrame.
   :type df: pd.DataFrame

   :returns: Input DataFrame with additional flagging columns.
   :rtype: pd.DataFrame


.. py:function:: fsc_pipeline(df: pandas.DataFrame, lane_level: bool = False) -> pandas.DataFrame

.. py:function:: fuel_pipeline(df)

.. py:function:: geocode_pipeline(df: pandas.DataFrame) -> pandas.DataFrame

.. py:function:: load_pipeline(df: pandas.DataFrame, run_type: str, dry_run: bool = False, overwrite: bool = False) -> pandas.DataFrame

.. py:function:: map_pipeline(df, dataset_params, lane_level=False)

.. py:function:: output_pipeline(df, lane_level=False)

.. py:function:: pcmiler_pipeline(df)

.. py:function:: pull_lighthouse_data(params) -> List[pandas.DataFrame]

   Pulls lighthouse shipper data for internal run. Filters down to only latest
   batch.

   :param adapt_params: Adapt params.
   :type adapt_params: dict

   :returns: DataFrame's containing lighthouse lanes.
   :rtype: DataFrame


.. py:function:: read_csv(input_path, **kwargs) -> pandas.DataFrame

.. py:function:: read_data(company_params: dict) -> Dict[str, pandas.DataFrame]

   Reads all data specified in company_params.

   :returns: Dictionary mapping dataset labels to DataFrames that
             can be passed to `pd.concat` directly to create one DataFrame.
   :rtype: Dict[str, pd.DataFrame]


.. py:function:: read_dataset(dataset_params) -> pandas.DataFrame | Dict[Any, pandas.DataFrame]

.. py:function:: read_drive(url: Optional[str] = None, path: Optional[str] = None, cache: bool = True, **kwargs) -> pandas.DataFrame

   Reads CSV, Excel, or Google Spreadsheet file from Google Drive either by
   url or path.

   :param url: File URL. Can be copied from browser navigation bar.
               Defaults to None.
   :type url: Optional[str], optional
   :param path: File path. First part needs to be drive name.
                Defaults to None.
   :type path: Optional[str], optional
   :param cache: Whether to cache the result to disk. Defaults to True. This
                 makes subsequent calls to read_drive faster.
   :type cache: bool, optional
   :param \*\*kwargs: Keyword arguments passed to pandas.read_csv or pandas.read_excel.

   Examples:
   >>> df = read_drive(url="https://drive.google.com/file/d/XXYYZZ/view?usp=sharing")
   >>> df = read_drive(path="Data Science/folder1/folder2/file.csv")

   :raises LeafGoogleDriveException: If both url and path are None or both are not None.
   :raises LeafGoogleDriveException: If file is not found on Google Drive.
   :raises LeafGoogleDriveException: If unable to download file from Google Drive.
   :raises ValueError: If neither pd.read_csv nor pd.read_excel can read the file.

   :returns: DataFrame read from file.
   :rtype: pd.DataFrame


.. py:function:: read_params(params_path: Union[str, pathlib.Path]) -> dict

.. py:function:: resolve_geocoding(df: pandas.DataFrame) -> pandas.DataFrame

.. py:function:: resolve_miles(df: pandas.DataFrame) -> pandas.DataFrame

.. py:function:: setup(input_params: Union[dict, str, pathlib.Path], log_file_name: Optional[str] = None, enable_log_git: bool = True, enable_log_params: bool = True) -> None

.. py:function:: to_drive(df: pandas.DataFrame, path: str, overwrite: bool = False, **kwargs) -> str

   Write a DataFrame to a file on Google Drive, at the specified path.

   :param df: DataFrame to write.
   :type df: pd.DataFrame
   :param path: Path to write to. First part of path is the drive name.
   :type path: str
   :param overwrite: Overwrite existing file. Defaults to False.
   :type overwrite: bool, optional

   Examples:
   >>> df.pipe(gdrive.to_drive, "drive_name/folder1/folder2/file_name.csv")
   >>> df.pipe(gdrive.to_drive, "drive_name/folder1/folder2/file_name.xlsx")

   :raises LeafGoogleDriveException: Raised if `overwrite` is False and file exists.
   :raises LeafGoogleDriveException: Raised if path is invalid.
   :raises LeafGoogleDriveException: Raised if write response does not contain file ID.

   :returns: Google Drive file URL.
   :rtype: str


.. py:function:: upload_to_drive(local_path: Union[str, pathlib.Path], drive_path: str, overwrite: bool = False) -> str

   Uploads file to Google Drive.

   :param local_path: Local path of file to upload.
   :type local_path: Union[str, Path]
   :param drive_path: Google Drive path to uploaded to. First part of path is the drive name.
                      Must include file name: "drive_name/folder1/folder2/file_name.zip".
   :type drive_path: str
   :param overwrite: Overwrite existing file. Defaults to False.
   :type overwrite: bool

   :raises LeafGoogleDriveException: Raised if `overwrite` is False and file exists.
   :raises LeafGoogleDriveException: Raised if unable to upload file.

   :returns: Google Drive URL of uploaded file.
   :rtype: str


.. py:function:: uuid_pipeline(df, set_shipment_uuid=True, set_lane_uuid=False)

.. py:function:: validate_pipeline(df, params=None)