diff --git a/scrape.py b/scrape.py index d9767042c6fbb51b8dd2b3f2b95556c3992970f9..8dc21be0a3ab6fa56e8bd7f7640e81b5fbbd3d26 100644 --- a/scrape.py +++ b/scrape.py @@ -49,14 +49,28 @@ import urllib.request from io import StringIO import io from selenium import webdriver -from selenium.webdriver.firefox.options import Options +#from selenium.webdriver.firefox.options import Options from selenium.common.exceptions import NoSuchElementException + + +from selenium import webdriver +import os + +chrome_options = webdriver.ChromeOptions() +chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN") +chrome_options.add_argument("--headless") +chrome_options.add_argument("--disable-dev-shm-usage") +chrome_options.add_argument("--no-sandbox") +driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options) + +''' os.system("which firefox") options = Options() #options.headless = True options.add_argument("--headless") import os + try: driver = webdriver.Firefox(options=options) @@ -75,7 +89,7 @@ except: #os.system("tar xvf firefox-45.0.2.tar.bz2") GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver") driver = webdriver.Firefox(options=options,executable_path=GECKODRIVER_PATH) - +''' rsrcmgr = PDFResourceManager() diff --git a/setup.sh b/setup.sh index 85a3983a16ffef2211c8b05536f4dc764eead010..3931924486721f25f6034b126cd35e7ad2757168 100644 --- a/setup.sh +++ b/setup.sh @@ -4,16 +4,7 @@ # download and install latest geckodriver for linux or mac. # required for selenium to drive a firefox browser. sudo apt-get update -sudo apt-get install jq wget firefox - -json=$(curl -s https://api.github.com/repos/mozilla/geckodriver/releases/latest) -url=$(echo "$json" | jq -r '.assets[].browser_download_url | select(contains("linux64"))') -curl -s -L "$url" | tar -xz -chmod +x geckodriver -sudo cp geckodriver . -sudo cp geckodriver ./app -export PATH=$PATH:$pwd/geckodriver -echo PATH +sudo apt-get install jq wget chromium-chromedriver sudo python3 -m pip install -r requirements.txt @@ -30,15 +21,6 @@ wget https://www.dropbox.com/s/crarli3772rf3lj/more_authors_results.p?dl=0 wget https://www.dropbox.com/s/x66zf52himmp5ox/benchmarks.p?dl=0 # sudo apt-get install -y firefox wget https://ftp.mozilla.org/pub/firefox/releases/45.0.2/linux-x86_64/en-GB/firefox-45.0.2.tar.bz2 -tar xvf firefox-45.0.2.tar.bz2 -sudo mv /usr/bin/firefox /usr/bin/firefox-backup -rm /usr/bin/firefox -sudo mv firefox/ /usr/lib/firefox -sudo ln -s /usr/lib/firefox /usr/bin/firefox - - - -which firefox mkdir -p ~/.streamlit/ echo "\