Source code for scraper_toolkit.components.Exporter

import csv
from pathlib import Path, PurePath
from typing import List, Union, TYPE_CHECKING

if TYPE_CHECKING:
    from scraper_toolkit.components.Parser import Parser


[docs]class Exporter: """Export data from parsers. :param: data: The data to export as a Parser object, a dictionary, or a list of dictionaries. """ def __init__(self, data: Union['Parser', dict, List[dict]]): self.data = self.__get_data_as_list_of_dicts(data)
[docs] def export_to_csv(self, csv_path: Union[Path, PurePath, str], encoding: str = 'UTF-8', write_header: bool = True): """Export parsed data to a CSV file. :param csv_path: Path of the location to save the CSV file. :param encoding: CSV file encoding. Default is UTF-8. :param write_header: If true, write a header row to the CSV file using the "name" keys in the provided data. """ fieldnames = self.__get_fieldnames() with open(csv_path, 'w', encoding=encoding, newline='') as fo: writer = csv.DictWriter(fo, fieldnames=fieldnames) if write_header: writer.writeheader() for row in self.data: writer.writerow(row)
@staticmethod def __get_data_as_list_of_dicts(data: Union['Parser', dict, List[dict]]): if 'Parser.Parser' in str(type(data)): return data.parsed elif isinstance(data, dict): return list(data) elif isinstance(data, list): is_list_of_dicts = all(isinstance(x, dict) for x in data) if is_list_of_dicts: return data msg = 'Invalid data input type' raise TypeError(msg) def __get_fieldnames(self) -> List[str]: fieldnames = self.data[0].keys() return list(fieldnames)