From c16074413549b22ee3431ce60c33f5b56c2f3424 Mon Sep 17 00:00:00 2001 From: Janik Besendorf <janik@besendorf.org> Date: Mon, 8 Feb 2021 13:25:26 +0100 Subject: [PATCH] fixes ConnectionResetError in cc-scraper and adds error handling --- common_criteria_scraper/cc_portal_scraper.py | 28 ++++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/common_criteria_scraper/cc_portal_scraper.py b/common_criteria_scraper/cc_portal_scraper.py index 3cd2000..c9f36f7 100644 --- a/common_criteria_scraper/cc_portal_scraper.py +++ b/common_criteria_scraper/cc_portal_scraper.py @@ -32,14 +32,26 @@ with open ('filtered.csv', 'r') as filtered: sys.exit(1) csv_dict = csv.DictReader(filtered) for row in csv_dict: - if row['Certification Report URL'] != '': - urllib.request.urlretrieve(row['Certification Report URL'].replace(' ','%20'),(row['Certification Report URL'].split('/')[-1])) - if row['Security Target URL'] != '': - urllib.request.urlretrieve(row['Security Target URL'].replace(' ','%20'),(row['Security Target URL'].split('/')[-1])) - if row['Maintenance Report'] != '': - urllib.request.urlretrieve(row['Maintenance Report'].replace(' ','%20'),(row['Maintenance Report'].split('/')[-1])) - if row['Maintenance ST'] != '': - urllib.request.urlretrieve(row['Maintenance ST'].replace(' ','%20'),(row['Maintenance ST'].split('/')[-1])) + if row['Certification Report URL'] != '': + try: + urllib.request.urlretrieve(row['Certification Report URL'].replace(' ','%20').replace(':443',''),(row['Certification Report URL'].split('/')[-1])) + except ConnectionResetError: + print('Connection reset by server. Continuing') + if row['Security Target URL'] != '': + try: + urllib.request.urlretrieve(row['Security Target URL'].replace(' ','%20').replace(':443',''),(row['Security Target URL'].split('/')[-1])) + except ConnectionResetError: + print('Connection reset by server. Continuing') + if row['Maintenance Report'] != '': + try: + urllib.request.urlretrieve(row['Maintenance Report'].replace(' ','%20').replace(':443',''),(row['Maintenance Report'].split('/')[-1])) + except ConnectionResetError: + print('Connection reset by server. Continuing') + if row['Maintenance ST'] != '': + try: + urllib.request.urlretrieve(row['Maintenance ST'].replace(' ','%20').replace(':443',''),(row['Maintenance ST'].split('/')[-1])) + except ConnectionResetError: + print('Connection reset by server. Continuing') \ No newline at end of file -- GitLab