Source code for berlin_gelaendemodelle_downloader.download

import io
import os
import re
import zipfile
import requests

from tqdm import tqdm
from bs4 import BeautifulSoup
from click import BadParameter

from .file import save_files
from .constant import DATA_URL, GEOJSON_FILE_FORMAT
from .utils import file_content_2_data_frame, create_directories, compress_data_frame, data_frame_2_geo_data_frame, download_2_file_content






[docs]def download_zip(zip_url: str) -> (str, str): """ Args: zip_url (str): URL to a zip subset of the data Returns: (str, list): file name and content of the file that is included in the zip archiv Raises: HTTPError: Will be raised if the ``zip_url`` responsed a not 200 code """ try: request = requests.get(zip_url) except Exception: raise requests.HTTPError(f"Could not get URL: {zip_url}") if request.status_code == 200: zip_file_bytes = io.BytesIO(request.content) zipped_sub_set = zipfile.ZipFile(zip_file_bytes) # Zip archives only contain a single file file_name = zipped_sub_set.namelist()[0] download_content = zipped_sub_set.read(file_name).decode("utf8") # fix not consistant file names if necessary if not re.match(r".*\d{3}_\d{4}\.txt", file_name): file_name = os.path.splitext(file_name)[0] file_name = f"{file_name[:3]}_{file_name[-4:]}.txt" return file_name, download_2_file_content(download_content) else: raise requests.HTTPError(f"Could not get URL: {zip_url}")
[docs]def download_data(download_path: str, keep_original: bool, compress: int, file_format: tuple): """ Download and compress the ground level data of Berlin. Args: download_path (str): path to download directory keep_original (bool): indicates to keep the original txt files or not compress (int): size of square pixels for compression file_format (tuple): file formats to save the data Raises: click.BadParameter: for invalid parameter combinations """ # Set constraints if compress <= 0: keep_original = True # creats all necessary directories original_path, compressed_path = create_directories(download_path, keep_original, compress, file_format) links = get_subset_links() # Without compression: download and keep the original files. if compress <= 0: for link in tqdm(links): file_name, file_content = download_zip(link) save_files(download_path, file_name, file_content, None, file_format, keep_original) # With compression: download and compress the data. Only save the originals it ``--keep-original`` is set. elif 2000 % compress == 0: for link in tqdm(links): file_name, file_content = download_zip(link) data_frame = file_content_2_data_frame(file_content) compressed_data_frame = compress_data_frame(data_frame, compress) if GEOJSON_FILE_FORMAT in file_format: compressed_data_frame = data_frame_2_geo_data_frame(compressed_data_frame) save_files(download_path, file_name, file_content, compressed_data_frame, file_format, keep_original) else: raise BadParameter("Argument 'compress' have to divide 2000 without remainder.")