From d560c98b5ff0b13f3804042575a96e23fdb6b351 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Manuel=20Barroso=20Galindo?= Date: Wed, 12 Apr 2023 00:34:17 +0200 Subject: [PATCH] Finishing ExternalFilesReader --- .github/db_operator.py | 59 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/.github/db_operator.py b/.github/db_operator.py index 68cd761cf..1417a728e 100755 --- a/.github/db_operator.py +++ b/.github/db_operator.py @@ -14,6 +14,8 @@ import json import hashlib import shlex import tempfile +import csv +from urllib.parse import urlparse from argparse import ArgumentParser from zipfile import ZipFile, ZIP_DEFLATED from dataclasses import dataclass @@ -149,7 +151,62 @@ class ExternalFilesReader: self._strpath = strpath def read_external_files(self) -> List[Tuple[Path, Dict[str, Any]]: - return [] + if self._strpath == '': + return [] + + data = self._read_csv_data() + if data is None: + return [] + + print('Parsing ' + self._strpath + ' to extract external files.) + result = [] + for row in data: + if len(row) < 4: + print('Not enough columns in this row, skipping it.', row) + continue + path, url, size, md5hash = row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip() + if not is_valid_path(path): + print(f"Invalid path in this row: {path}, skipping it.", row) + continue + if not is_valid_url(url): + print(f"Invalid URL in this row: {url}, skipping it.", row) + continue + if not is_valid_size(size): + print(f"Invalid size in this row: {size}, skipping it.", row) + continue + if not is_valid_md5hash(md5hash): + print(f"Invalid MD5 hash in this row: {md5hash}, skipping it.", row) + continue + + result.append((path, {"url": url, "size": int(size), "hash": md5hash})) + + return result + + def _read_csv_data(self) -> Optional[List[List[str]]]: + try: + with open(self._strpath, newline='') as csvfile: + csv_reader = csv.reader(csvfile, delimiter=',', quotechar='"') + return [row for row in csv_reader][1:] + except Exception as e: + print('csv file not opened: ' + self._strpath) + print(e) + return None + +def is_valid_url(url: str) -> bool: + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError: + return False + +def is_valid_path(path: str) -> bool: + return bool(re.match(r'^[\w\-/]{3,}$', path)) + +def is_valid_size(size: str) -> bool: + return size.isdigit() and int(size) > 0 + +def is_valid_md5hash(md5hash: str) -> bool: + return bool(re.match(r'^[a-fA-F0-9]{32}$', md5hash)) initial_filter_aliases = [ # Consoles