From c16074413549b22ee3431ce60c33f5b56c2f3424 Mon Sep 17 00:00:00 2001
From: Janik Besendorf <janik@besendorf.org>
Date: Mon, 8 Feb 2021 13:25:26 +0100
Subject: [PATCH] fixes ConnectionResetError in cc-scraper and adds error
 handling

---
 common_criteria_scraper/cc_portal_scraper.py | 28 ++++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/common_criteria_scraper/cc_portal_scraper.py b/common_criteria_scraper/cc_portal_scraper.py
index 3cd2000..c9f36f7 100644
--- a/common_criteria_scraper/cc_portal_scraper.py
+++ b/common_criteria_scraper/cc_portal_scraper.py
@@ -32,14 +32,26 @@ with open ('filtered.csv', 'r') as filtered:
         sys.exit(1)
     csv_dict = csv.DictReader(filtered)
     for row in csv_dict:
-        if row['Certification Report URL'] != '': 
-            urllib.request.urlretrieve(row['Certification Report URL'].replace(' ','%20'),(row['Certification Report URL'].split('/')[-1]))
-        if row['Security Target URL'] != '': 
-            urllib.request.urlretrieve(row['Security Target URL'].replace(' ','%20'),(row['Security Target URL'].split('/')[-1]))
-        if row['Maintenance Report'] != '': 
-            urllib.request.urlretrieve(row['Maintenance Report'].replace(' ','%20'),(row['Maintenance Report'].split('/')[-1]))
-        if row['Maintenance ST'] != '': 
-            urllib.request.urlretrieve(row['Maintenance ST'].replace(' ','%20'),(row['Maintenance ST'].split('/')[-1]))
+        if row['Certification Report URL'] != '':
+            try:
+                urllib.request.urlretrieve(row['Certification Report URL'].replace(' ','%20').replace(':443',''),(row['Certification Report URL'].split('/')[-1]))
+            except ConnectionResetError:
+                print('Connection reset by server. Continuing')
+        if row['Security Target URL'] != '':
+            try:
+                urllib.request.urlretrieve(row['Security Target URL'].replace(' ','%20').replace(':443',''),(row['Security Target URL'].split('/')[-1]))
+            except ConnectionResetError:
+                print('Connection reset by server. Continuing')
+        if row['Maintenance Report'] != '':
+            try:
+                urllib.request.urlretrieve(row['Maintenance Report'].replace(' ','%20').replace(':443',''),(row['Maintenance Report'].split('/')[-1]))
+            except ConnectionResetError:
+                print('Connection reset by server. Continuing')
+        if row['Maintenance ST'] != '':
+            try:
+                urllib.request.urlretrieve(row['Maintenance ST'].replace(' ','%20').replace(':443',''),(row['Maintenance ST'].split('/')[-1]))
+            except ConnectionResetError:
+                print('Connection reset by server. Continuing')
             
 
         
\ No newline at end of file
-- 
GitLab