diff --git a/_author_specificDavid Grayden.p b/_author_specificDavid Grayden.p deleted file mode 100644 index aa98c2b47807f91863d3883b02d5d0f896d90ab6..0000000000000000000000000000000000000000 Binary files a/_author_specificDavid Grayden.p and /dev/null differ diff --git a/_author_specificSayali Phatak.p b/_author_specificSayali Phatak.p new file mode 100644 index 0000000000000000000000000000000000000000..970a1fedf27d9677bba0533d1c1e7d1b8ac415b4 Binary files /dev/null and b/_author_specificSayali Phatak.p differ diff --git a/entry_point.py b/entry_point.py index b9a6c64a079a806979a619300e114e3e7bad8bad..88209c78e0029b3246cf737509292b7f12c740cf 100644 --- a/entry_point.py +++ b/entry_point.py @@ -44,11 +44,11 @@ df0 = pd.DataFrame(lods) colors = px.colors.diverging.Portland colors = [colors[0], colors[1]] -author_name = st.text_input('Please Enter the scholar Author you would like to search for:') +author_name = st.text_input('Enter Scholary Author:')i if author_name: ar = call_from_front_end(author_name) standard_sci = [ t['standard'] for t in ar ] - group_labels = ['Author Scraped']#, 'Group 2', 'Group 3'] + group_labels = ['Author: '+str(author_name)]#, 'Group 2', 'Group 3'] scraped_labels = [ str(x['link']) for x in ar] @@ -62,41 +62,41 @@ if author_name: marginal="rug",# marginal='violin',# or violin, rug hover_data=df.columns) - fig0.update_layout(title_text='Scholar scraped Author Versus Art Corpus',width=900, height=900)#, hovermode='x') + fig0.update_layout(title_text='Scholar scraped {0} Versus Art Corpus'.format(author_name),width=900, height=900)#, hovermode='x') st.write(fig0) else: try: - with open('_author_specificR Gerkin.p','rb') as f: contents = pickle.load(f) + with open('_author_specificSayali Phatak.p','rb') as f: contents = pickle.load(f) except: - with open('more_authors_results.p','rb') as f: contents = pickle.load(f) - (NAME,ar,df,datay,scholar_link) = contents - (ar, trainingDats) = ar_manipulation(ar) - standard_sci = [ t['standard'] for t in ar ] - - scraped_labels = [ str(x['link']) for x in ar] - group_labels = ['Author Scraped']#, 'Group 2', 'Group 3'] - #colors = ['#393E46', '#2BCDC1', '#F66095'] - - #fig = ff.create_distplot([standard_sci], group_labels, colors=colors, - # bin_size=[0.3, 0.2, 0.1], show_curve=True) - - lods = [] - for i,j,k in zip(standard_sci,[str('a Previous Scrape') for i in range(0,len(ar))],scraped_labels): - lods.append({'Reading_Level':i,'Origin':j,'Web_Link':k}) - df1 = pd.DataFrame(lods) - df = pd.concat([df1,df0]) - #colors = [colors[0], colors[1]] - - fig0 = px.histogram(df, x="Reading_Level", y="Web_Link", color="Origin", - marginal="rug",# marginal='violin',# or violin, rug - hover_data=df.columns) - - fig0.update_layout(title_text='Scholar scraped Author Versus Art Corpus',width=900, height=900)#, hovermode='x') - - st.write(fig0) + with open('_author_specificDavid Grayden.p','rb') as f: contents = pickle.load(f) + (NAME,ar,df,datay,scholar_link) = contents + (ar, trainingDats) = ar_manipulation(ar) + standard_sci = [ t['standard'] for t in ar ] + + scraped_labels = [ str(x['link']) for x in ar] + group_labels = ['Author Scraped']#, 'Group 2', 'Group 3'] + #colors = ['#393E46', '#2BCDC1', '#F66095'] + + #fig = ff.create_distplot([standard_sci], group_labels, colors=colors, + # bin_size=[0.3, 0.2, 0.1], show_curve=True) + + lods = [] + for i,j,k in zip(standard_sci,[str('S Phatak') for i in range(0,len(ar))],scraped_labels): + lods.append({'Reading_Level':i,'Origin':j,'Web_Link':k}) + df1 = pd.DataFrame(lods) + df = pd.concat([df1,df0]) + #colors = [colors[0], colors[1]] + + fig0 = px.histogram(df, x="Reading_Level", y="Web_Link", color="Origin", + marginal="rug",# marginal='violin',# or violin, rug + hover_data=df.columns) + + fig0.update_layout(title_text='Scholar S Phatak Versus Art Corpus',width=900, height=900)#, hovermode='x') + + st.write(fig0) st.text('number scraped documents: {0}'.format(len(ar))) diff --git a/online_app_backend.py b/online_app_backend.py index 54a5339c7e2b41e97efe6e3b20ece3e30b6e5eb8..d0bcc1efb38c68b2e0dbbae7eb6b26ac0541a143 100644 --- a/online_app_backend.py +++ b/online_app_backend.py @@ -73,13 +73,12 @@ def take_url_from_gui(author_link_scholar_link_list): ''' author_results = [] follow_links = collect_pubs(author_link_scholar_link_list)[0:10] - for r in tqdm(follow_links,title='Approx N. Documents scrapped'): + for r in tqdm(follow_links,title='Progess of scraping'): try: urlDat = process(r) - except: follow_more_links = collect_pubs(r) - for r in tqdm(follow_more_links,title='Approx N. Documents scrapped'): + for r in tqdm(follow_more_links,title='Progess of scraping'): urlDat = process(r)