import os
import requests
from tqdm import tqdm
from ..utils.onedrive import OneDrive
[docs]def download_file(url, store_file_path):
"""Download the raw dataset file.
Download the dataset with the given url and save to the store_path.
Args:
url: the url that can be downloaded the dataset file.
store_file_path: the path that stores the downloaded file.
Return:
the archive format of the suffix.
"""
filename = url.split("/")[-1]
print(f"Start downloading file {filename} with url {url}...")
if "1drv.ms" in url:
# allow downloading raw data from Onedrive
store_file_path = os.path.dirname(store_file_path)
folder = OneDrive(url=url, path=store_file_path)
folder.download()
else:
r = requests.get(url, allow_redirects=True, stream=True)
# Total size in bytes
total_size = int(r.headers.get("content-length", 0))
block_size = 1024
t = tqdm(total=total_size, unit="iB", unit_scale=True)
with open(store_file_path, "wb") as f:
for data in r.iter_content(block_size):
t.update(len(data))
f.write(data)
t.close()
if total_size != 0 and t.n != total_size:
print("ERROR, download fail")
else:
print(f"Success loading file {filename} to {store_file_path}")
[docs]def download_file_from_onedrive(url, path):
"""Download processed file from OneDrive.
Download file from OneDrive with the give url and save to the given path.
Args:
url: the shared link generated by OneDrive.
path: the path supposed to store the file.
"""
folder = OneDrive(url=url, path=path)
folder.download()