Source code for beta_rec.utils.download

import os

import requests
from tqdm import tqdm

from ..utils.onedrive import OneDrive


[docs]def download_file(url, store_file_path):
    """Download the raw dataset file.

    Download the dataset with the given url and save to the store_path.

    Args:
        url: the url that can be downloaded the dataset file.
        store_file_path: the path that stores the downloaded file.
    Return:
        the archive format of the suffix.
    """
    filename = url.split("/")[-1]
    print(f"Start downloading file {filename} with url {url}...")

    if "1drv.ms" in url:
        # allow downloading raw data from Onedrive
        store_file_path = os.path.dirname(store_file_path)
        folder = OneDrive(url=url, path=store_file_path)
        folder.download()
    else:
        r = requests.get(url, allow_redirects=True, stream=True)
        # Total size in bytes
        total_size = int(r.headers.get("content-length", 0))
        block_size = 1024
        t = tqdm(total=total_size, unit="iB", unit_scale=True)

        with open(store_file_path, "wb") as f:
            for data in r.iter_content(block_size):
                t.update(len(data))
                f.write(data)
        t.close()

        if total_size != 0 and t.n != total_size:
            print("ERROR, download fail")
        else:
            print(f"Success loading file {filename} to {store_file_path}")


[docs]def get_format(suffix):
    """Get the archive format.

    Get the archive format of the archive file with its suffix.

    Args:
        suffix: suffix of the archive file.
    Return:
        the archive format of the suffix.
    """
    format_map = {
        "bz2": "bztar",
        "gz": "gztar",
    }
    if suffix not in format_map:
        return suffix
    return format_map[suffix]


[docs]def download_file_from_onedrive(url, path):
    """Download processed file from OneDrive.

    Download file from OneDrive with the give url and save to the given path.

    Args:
        url: the shared link generated by OneDrive.
        path: the path supposed to store the file.
    """
    folder = OneDrive(url=url, path=path)
    folder.download()