import csv
from pathlib import Path, PurePath
from typing import List, Union, TYPE_CHECKING
if TYPE_CHECKING:
from scraper_toolkit.components.Parser import Parser
[docs]class Exporter:
"""Export data from parsers.
:param: data: The data to export as a Parser object, a dictionary, or a list of dictionaries.
"""
def __init__(self, data: Union['Parser', dict, List[dict]]):
self.data = self.__get_data_as_list_of_dicts(data)
[docs] def export_to_csv(self, csv_path: Union[Path, PurePath, str], encoding: str = 'UTF-8', write_header: bool = True):
"""Export parsed data to a CSV file.
:param csv_path: Path of the location to save the CSV file.
:param encoding: CSV file encoding. Default is UTF-8.
:param write_header: If true, write a header row to the CSV file using the "name" keys in the provided data.
"""
fieldnames = self.__get_fieldnames()
with open(csv_path, 'w', encoding=encoding, newline='') as fo:
writer = csv.DictWriter(fo, fieldnames=fieldnames)
if write_header:
writer.writeheader()
for row in self.data:
writer.writerow(row)
@staticmethod
def __get_data_as_list_of_dicts(data: Union['Parser', dict, List[dict]]):
if 'Parser.Parser' in str(type(data)):
return data.parsed
elif isinstance(data, dict):
return list(data)
elif isinstance(data, list):
is_list_of_dicts = all(isinstance(x, dict) for x in data)
if is_list_of_dicts:
return data
msg = 'Invalid data input type'
raise TypeError(msg)
def __get_fieldnames(self) -> List[str]:
fieldnames = self.data[0].keys()
return list(fieldnames)