diff --git a/.binder/postBuild b/.binder/postBuild
new file mode 100644
index 0000000000000000000000000000000000000000..e53d6b0b895eba838307faf2f3ebb3e7fbb586a6
--- /dev/null
+++ b/.binder/postBuild
@@ -0,0 +1,7 @@
+ 
+# enable nbserverproxy
+jupyter serverextension enable --sys-prefix nbserverproxy
+# streamlit launches at startup
+mv .binder/streamlit_call.py ${NB_PYTHON_PREFIX}/lib/python*/site-packages/
+# enable streamlit extension
+jupyter serverextension enable --sys-prefix streamlit_call
\ No newline at end of file
diff --git a/.binder/requirements.txt b/.binder/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5932971dc8f774666ded5d32e426c3d4d619d093
--- /dev/null
+++ b/.binder/requirements.txt
@@ -0,0 +1,15 @@
+PyPDF2
+pycld2
+nltk
+selenium
+delver
+pdfminer
+pyvirtualdisplay
+textstat
+fsspec>=0.3.3
+textblob
+twython
+streamlit
+streamlit==0.52.2
+jupyter-server-proxy==1.2.0
+nbserverproxy==0.8.8
\ No newline at end of file
diff --git a/.binder/streamlit_call.py b/.binder/streamlit_call.py
new file mode 100644
index 0000000000000000000000000000000000000000..81217c61fa200badab8a33111b5fcb57d9654030
--- /dev/null
+++ b/.binder/streamlit_call.py
@@ -0,0 +1,6 @@
+
+from subprocess import Popen
+
+def load_jupyter_server_extension(nbapp):
+    """serve the streamlit app"""
+    Popen(["streamlit", "run", "../entry_point.py", "--browser.serverAddress=0.0.0.0", "--server.enableCORS=False"])
\ No newline at end of file
diff --git a/README.md b/README.md
index 66e4e01f22bb1517642340f5c02a5a736f68875a..f7c304041d31988c0f71c301156dd475d5ff50ef 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ sudo bash install.sh
 streamlit run entry_point.py
 ```
 
-**![Example Screen Shot](example_app.png)**
+**![Example Screen Shot](data/example_app.png)**
 
 
 [![Build Status](https://travis-ci.com/russelljjarvis/ScienceAccessibility.png)](https://travis-ci.com/russelljjarvis/ScienceAccessibility) 
diff --git a/data/_author_specificDavid Grayden.p b/data/_author_specificDavid Grayden.p
new file mode 100644
index 0000000000000000000000000000000000000000..aa98c2b47807f91863d3883b02d5d0f896d90ab6
Binary files /dev/null and b/data/_author_specificDavid Grayden.p differ
diff --git a/data/_author_specificSayali Phatak.p b/data/_author_specificSayali Phatak.p
new file mode 100644
index 0000000000000000000000000000000000000000..970a1fedf27d9677bba0533d1c1e7d1b8ac415b4
Binary files /dev/null and b/data/_author_specificSayali Phatak.p differ
diff --git a/data/example_app.png b/data/example_app.png
new file mode 100644
index 0000000000000000000000000000000000000000..be5f3cc1cefdc74832a328aa15a9d83e371c9d83
Binary files /dev/null and b/data/example_app.png differ
diff --git a/data/more_authors_results.p b/data/more_authors_results.p
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/entry_point.py b/entry_point.py
index ee6120073cd24d810c19cb3a0ec1e303866b5ab3..6b11e69d4a0bc5597c6ec296843bc4754e133afe 100644
--- a/entry_point.py
+++ b/entry_point.py
@@ -20,19 +20,17 @@ except:
     nltk.download('punkt')
     nltk.download('stopwords')
 
-if not(os.path.exists('traingDats.p?dl=0') or os.path.exists('traingDats.p')):
+if not(os.path.exists('traingDats.p?dl=0') or os.path.exists('data/traingDats.p')):
 
     os.system('wget https://www.dropbox.com/s/3h12l5y2pn49c80/traingDats.p?dl=0')
-    os.system('wget https://www.dropbox.com/s/crarli3772rf3lj/more_authors_results.p?dl=0')
     os.system('wget https://www.dropbox.com/s/x66zf52himmp5ox/benchmarks.p?dl=0')
 
-if os.path.exists("traingDats.p?dl=0") and not os.path.exists("traingDats.p"):
-    os.system('mv traingDats.p?dl=0 traingDats.p')
-    os.system('mv benchmarks.p?dl=0 benchmarks.p')
-    os.system('mv more_authors_results.p?dl=0 more_authors_results.p')
+if os.path.exists("traingDats.p?dl=0") and not os.path.exists("data/traingDats.p"):
+    os.system('mv traingDats.p?dl=0 data/traingDats.p')
+    os.system('mv benchmarks.p?dl=0 data/benchmarks.p')
 
 
-trainingDats = pickle.load(open('traingDats.p','rb'))
+trainingDats = pickle.load(open('data/traingDats.p','rb'))
 bio_chem = [ t['standard'] for t in trainingDats ]
 biochem_labels =  [ x['file_name'] for x in trainingDats if 'file_name' in x.keys()]
 biochem_labels = [x.split("/")[-1] for x in biochem_labels ]
@@ -44,8 +42,16 @@ df0 = pd.DataFrame(lods)
 
 theme = px.colors.diverging.Portland
 colors = [theme[0], theme[1]]
-st.title('Search Reading Difficulty of Academic Author')
+st.title('Search Reading Difficulty of Academic')
 author_name = st.text_input('Enter Author:')
+def make_clickable(link):
+    # target _blank to open new window
+    # extract clickable text to display for your link
+    text = link#.split('=')[1]
+    return f'<a target="_blank" href="{link}">{text}</a>'
+
+    
+
 if author_name:
     ar = call_from_front_end(author_name)
     standard_sci = [ t['standard'] for t in ar ]
@@ -63,9 +69,11 @@ if author_name:
     #                marginal="rug",# marginal='violin',# or violin, rug
     #                hover_data=df.columns)
     fig0 = px.histogram(df, x="Reading_Level", y="Web_Link", color="Origin",
-                    marginal="violin",
+                    marginal="box",
                     opacity=0.7,# marginal='violin',# or violin, rug
-                    hover_data=df.columns, color_discrete_sequence=colors)
+                    hover_data=df.columns,
+                    hover_name=df["Web_Link"],
+                    color_discrete_sequence=colors)
 
     fig0.update_layout(title_text='Scholar scraped {0} Versus Art Corpus'.format(author_name),width=900, height=900)#, hovermode='x')
             
@@ -73,10 +81,8 @@ if author_name:
 
 
 else:   
-    try:
-        with open('_author_specificSayali Phatak.p','rb') as f: contents = pickle.load(f)   
-    except:
-        with open('_author_specificDavid Grayden.p','rb') as f: contents = pickle.load(f)
+    with open('data/_author_specificSayali Phatak.p','rb') as f: 
+        contents = pickle.load(f)   
     (NAME,ar,df,datay,scholar_link) =  contents     
     (ar, trainingDats) = ar_manipulation(ar)
     standard_sci = [ t['standard'] for t in ar ]
@@ -93,41 +99,72 @@ else:
         lods.append({'Reading_Level':i,'Origin':j,'Web_Link':k})
     df1 = pd.DataFrame(lods)
     df = pd.concat([df1,df0])
+
+
+    #df['Web_Link'] = df['Web_Link'].apply(make_clickable)
+    #df = df.to_html(escape=False)
     
     #colors = [colors[0], colors[1]]
 
     fig0 = px.histogram(df, x="Reading_Level", y="Web_Link", color="Origin",
-                    marginal="rug",
+                    marginal="box",
                     opacity=0.7,# marginal='violin',# or violin, rug
                     hover_data=df.columns,
+                    hover_name=df["Web_Link"],
                     color_discrete_sequence=colors)
 
     fig0.update_layout(title_text='Scholar S Phatak Versus Art Corpus',width=900, height=600)#, hovermode='x')
             
     st.write(fig0)
 '''
+
 ### Total number scraped documents:
+
 '''
 st.text(len(ar))
 
+if np.mean(standard_sci) < np.mean(bio_chem):
+    '''
+
+
+    ### This author was easier to read as the average of ARTCORPUS:
+    A varied collection of biochemistry science papers
+    '''
+
+if np.mean(standard_sci) >= np.mean(bio_chem):
+    '''
 
 
+    ### This author was harder or just as hard to read as average of ARTCORPUS:
+    A varied collection of biochemistry science papers
+    '''
 
 
 
 
+df_links = pd.DataFrame()
+df_links['Web_Link'] = pd.Series(scraped_labels)
+df_links['Reading_Level'] = pd.Series(standard_sci)
+#st.write(df)
+# link is the column with hyperlinks
+df_links['Web_Link'] = df_links['Web_Link'].apply(make_clickable)
+df_links = df_links.to_html(escape=False)
+st.write(df_links, unsafe_allow_html=True)
+
 x1 = df0['Reading_Level']#np.random.randn(200)
 x2 = df1['Reading_Level']#np.random.randn(200) + 2
 if author_name:
     group_labels = ['Comparison Data ', str(author_name)]
 else:
-    group_labels = ['Comparison Data ', str('search_author')]
+    group_labels = ['Comparison Data ', str('S Phatak')]
 
 
 # Create distplot with curve_type set to 'normal'
 colors = [theme[-1], theme[-2]]
 
-rt=list(df['Web_Link'])
+#rt=list(df['Web_Link'])
+rt=list(pd.Series(scraped_labels))
+
 #st.text('number scraped documents: {0}'.format(rt))
 
 fig = ff.create_distplot([x1, x2], group_labels, bin_size=2,colors=colors,rug_text=rt)
@@ -139,11 +176,49 @@ fig.update_layout(width=900, height=600)#, hovermode='x')
 
 st.write(fig)
 
-#print(group_labels)
-#group_labels = ['Biochemistry Documents']#, 'Group 2', 'Group 3']
+list_df = pickle.load(open("data/benchmarks.p","rb")) 
+bm = pd.DataFrame(list_df)
+
+bm = bm.rename(columns={'link': 'Web_Link', 'standard': 'Reading_Level'})
+bm["Origin"] = pd.Series(["Benchmark" for i in range(0,len(bm))])
+#del bm.loc['nicholas']
+#del bm.loc['local_resource']
+#bm = bm.drop('nicholas', axis=0))
+bm = bm.drop(4, axis=0)
 
-#colors = ['#393E46']#, '#2BCDC1', '#F66095']
+bm_temp = pd.DataFrame()
+bm_temp["Origin"] = bm["Origin"]
+bm_temp["Web_Link"] = bm["Web_Link"]
+bm_temp["Reading_Level"] = bm["Reading_Level"]
+import copy
+bm = copy.copy(bm_temp)
 
-#fig = ff.create_distplot([standard_sci], group_labels, colors=colors,
-#                         bin_size=[0.3, 0.2, 0.1], show_curve=True)
+bm_temp['Web_Link'] = bm_temp['Web_Link'].apply(make_clickable)
+bm_temp = bm_temp.to_html(escape=False)
+st.write(bm_temp, unsafe_allow_html=True)
+
+x1 = bm['Reading_Level']
+x2 = df1['Reading_Level']
+
+x3 = df0['Reading_Level']
+
+
+rt=list(bm['Web_Link'])
+rt.extend(list(df1['Web_Link']))
+rt.extend(list(df0['Web_Link']))
+
+colors = [theme[0], theme[4],theme[2]]
+if author_name:
+    group_labels = ['Ideal Bench Marks ', str(author_name), str('Comparison Data')]
+else:
+    group_labels = ['Ideal Bench Marks  ', str('S Phatak'), str('Comparison Data')]
+
+fig = ff.create_distplot([x1, x2, x3], group_labels, bin_size=1,colors=colors,rug_text=rt)
+
+hover_trace = [t for t in fig['data'] if 'text' in t]
+
+fig.update_layout(title_text='Benchmarks versus scraped Author')
+fig.update_layout(width=900, height=600)#, hovermode='x')
+
+st.write(fig)
 
diff --git a/install.sh b/install.sh
index f69ebdeb9ce20e96484ff3acd9eed3ae84d2c7f1..8cf2558f067fa40f9fa6c03de619678a9923761d 100644
--- a/install.sh
+++ b/install.sh
@@ -3,20 +3,21 @@
 #!/bin/bash
 # download and install latest geckodriver for linux or mac.
 # required for selenium to drive a firefox browser.
+pip=$(sudo which pip)
 sudo /home/user/anaconda3/bin/pip install -r requirements.txt
 
 sudo apt-get install jq
-sudo pip install PyPDF2
-sudo pip install pycld2
-sudo pip install nltk
-sudo pip install selenium
-sudo pip install delver
-sudo pip install pdfminer
-sudo pip install pyvirtualdisplay
-sudo pip install textstat
-sudo pip install fsspec>=0.3.3
-sudo pip install textblob
-sudo pip install twython
+sudo /home/user/anaconda3/bin/pip install PyPDF2
+sudo /home/user/anaconda3/bin/pip install pycld2
+sudo /home/user/anaconda3/bin/pip install nltk
+sudo /home/user/anaconda3/bin/pip install selenium
+sudo /home/user/anaconda3/bin/pip install delver
+sudo /home/user/anaconda3/bin/pip install pdfminer
+sudo /home/user/anaconda3/bin/pip install pyvirtualdisplay
+sudo /home/user/anaconda3/bin/pip install textstat
+sudo /home/user/anaconda3/bin/pip install fsspec>=0.3.3
+sudo /home/user/anaconda3/bin/pip install textblob
+sudo /home/user/anaconda3/bin/pip install twython
 sudo python3 -c "import nltk; nltk.download('punkt')"
 sudo python3 -c "import nltk; nltk.download('stopwords')"
 sudo bash gecko_install.sh
@@ -28,3 +29,4 @@ mv scholar.py ..
 wget https://www.dropbox.com/s/3h12l5y2pn49c80/traingDats.p?dl=0
 wget https://www.dropbox.com/s/crarli3772rf3lj/more_authors_results.p?dl=0
 wget https://www.dropbox.com/s/x66zf52himmp5ox/benchmarks.p?dl=0
+ 
\ No newline at end of file
diff --git a/online_app_backend.py b/online_app_backend.py
index d0bcc1efb38c68b2e0dbbae7eb6b26ac0541a143..3e8a5b86e2097020ede240190157c1b64d81922a 100644
--- a/online_app_backend.py
+++ b/online_app_backend.py
@@ -172,7 +172,7 @@ def ar_manipulation(ar):
     #with open(str('more_authors_results.p'),'wb') as f:
     #    pickle.dump([NAME,ar],f)
 
-    with open('traingDats.p','rb') as f:
+    with open('data/traingDats.p','rb') as f:
         trainingDats = pickle.load(f)
         
     trainingDats.extend(ar)
diff --git a/requirements.txt b/requirements.txt
index 829e47da3d38407dad777fd698a0ec47cb41b9e7..5932971dc8f774666ded5d32e426c3d4d619d093 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,6 @@ fsspec>=0.3.3
 textblob
 twython
 streamlit
+streamlit==0.52.2
+jupyter-server-proxy==1.2.0
+nbserverproxy==0.8.8
\ No newline at end of file