diff --git a/science_access/get_bmark_corpus.py b/science_access/get_bmark_corpus.py index 6fff696d2b473b8bf498fa4ef63a9b82b016b741..f8e69333acc0d9874eefdba83d4b0965b420c81f 100644 --- a/science_access/get_bmark_corpus.py +++ b/science_access/get_bmark_corpus.py @@ -37,6 +37,11 @@ def process(link): pdf_file = requests.get(link, stream=True) buffered = convert_pdf_to_txt(pdf_file) urlDat = text_proc(buffered,urlDat) + driver.close() + driver.quit() + driver = None + del driver + return urlDat #try: diff --git a/science_access/scrape.py b/science_access/scrape.py index 2eb08944540cb6f6cb6d7df23b76a1b742899ed7..57f71ae0ad71697fa3543b92d79b5028182e4590 100644 --- a/science_access/scrape.py +++ b/science_access/scrape.py @@ -55,7 +55,6 @@ if 'DYNO' in os.environ: heroku = False else: heroku = True -''' def get_driver(): if 'DYNO' in os.environ: heroku = True @@ -98,8 +97,7 @@ def get_driver(): return driver -driver = get_driver() -''' +#driver = get_driver() rsrcmgr = PDFResourceManager() retstr = StringIO()