diff --git a/science_access/crawl.py b/science_access/crawl.py index 39f420bc5093068f8fee1db0c767364e6bf23152..f0c0667c548fe6cfeaf488f08ed65a168f37b1d5 100644 --- a/science_access/crawl.py +++ b/science_access/crawl.py @@ -204,5 +204,8 @@ def collect_pubs(url): check_out = link.get('href') #if '/citations?' in check_out: links.append(check_out) - + driver.close() + driver.quit() + driver = None + del driver return links diff --git a/science_access/online_app_backend.py b/science_access/online_app_backend.py index d1db3a18a8c17a9e155103d0ffcd419250ccdb4a..c5f1b61f39405a41905ee3bcda43495f91bf2a00 100644 --- a/science_access/online_app_backend.py +++ b/science_access/online_app_backend.py @@ -82,15 +82,15 @@ def take_url_from_gui(author_link_scholar_link_list): follow_links = collect_pubs(author_link_scholar_link_list)[0:12] for r in tqdm(follow_links,title='Progess of scraping'): - if heroku: - sleep(np.random.uniform(1,3)) + #if heroku: + # sleep(np.random.uniform(1,3)) try: urlDat = process(r) except: follow_more_links = collect_pubs(r) for r in tqdm(follow_more_links,title='Progess of scraping'): - if heroku: - sleep(np.random.uniform(1,3)) + #if heroku: + # sleep(np.random.uniform(1,3)) urlDat = process(r) if not isinstance(urlDat,type(None)): author_results.append(urlDat) diff --git a/science_access/scrape.py b/science_access/scrape.py index ce122bd54c15746343bcb49c089fa3b3d0b7dad0..2eb08944540cb6f6cb6d7df23b76a1b742899ed7 100644 --- a/science_access/scrape.py +++ b/science_access/scrape.py @@ -55,6 +55,7 @@ if 'DYNO' in os.environ: heroku = False else: heroku = True +''' def get_driver(): if 'DYNO' in os.environ: heroku = True @@ -98,7 +99,7 @@ def get_driver(): driver = get_driver() - +''' rsrcmgr = PDFResourceManager() retstr = StringIO()