Source code for scraper_toolkit.components.Exporter

import csv
from pathlib import Path, PurePath
from typing import List, Union, TYPE_CHECKING

if TYPE_CHECKING:
    from scraper_toolkit.components.Parser import Parser


[docs]class Exporter:
    """Export data from parsers.

    :param: data: The data to export as a Parser object, a dictionary, or a list of dictionaries.
    """
    def __init__(self, data: Union['Parser', dict, List[dict]]):
        self.data = self.__get_data_as_list_of_dicts(data)

[docs]    def export_to_csv(self, csv_path: Union[Path, PurePath, str], encoding: str = 'UTF-8', write_header: bool = True):
        """Export parsed data to a CSV file.

        :param csv_path: Path of the location to save the CSV file.
        :param encoding: CSV file encoding. Default is UTF-8.
        :param write_header: If true, write a header row to the CSV file using the "name" keys in the provided data.
        """
        fieldnames = self.__get_fieldnames()

        with open(csv_path, 'w', encoding=encoding, newline='') as fo:
            writer = csv.DictWriter(fo, fieldnames=fieldnames)

            if write_header:
                writer.writeheader()

            for row in self.data:
                writer.writerow(row)

    @staticmethod
    def __get_data_as_list_of_dicts(data: Union['Parser', dict, List[dict]]):
        if 'Parser.Parser' in str(type(data)):
            return data.parsed
        elif isinstance(data, dict):
            return list(data)
        elif isinstance(data, list):
            is_list_of_dicts = all(isinstance(x, dict) for x in data)
            if is_list_of_dicts:
                return data

        msg = 'Invalid data input type'
        raise TypeError(msg)

    def __get_fieldnames(self) -> List[str]:
        fieldnames = self.data[0].keys()
        return list(fieldnames)