-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetchpopular.py
More file actions
120 lines (106 loc) · 2.88 KB
/
Copy pathfetchpopular.py
File metadata and controls
120 lines (106 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
import urllib
import bs4
import requests
import textrank
setpage = "https://www.wired.com/"
link_list=[]
def remove_related(soup):
links = (soup.find_all("li",{"class": "article-list-item-embed-component__post"}))
def remove_refs(input_string):
tmp=""
flag = False
stringbuilder= ""
stringbuilder = input_string
for x in input_string:
if x == "[":
flag=True
tmp = tmp+x
elif flag == True and x == "]":
tmp=tmp+x
flag = False
stringbuilder = stringbuilder.replace(tmp,"")
tmp=""
stringbuilder = " ".join(stringbuilder.split())
elif flag == True:
tmp=tmp +x
return stringbuilder
def remove_trail(txt,delim):
txt = txt.rsplit(delim, 1)[0]
return txt
def create_summary(page):
resp = urllib.request.urlopen(page)
soup = bs4.BeautifulSoup(resp,'html.parser')
title = (soup.find('h1')).text
pgraph= soup.find_all('p')
text=""
for x in range((len(pgraph))):
if "Use of this site constitutes" not in pgraph[x].text:
text = text + pgraph[x].text
text = remove_refs(text)
summary = textrank.extract_sentences(text)
summary = remove_trail(summary,".")
summary = summary + "."
print('\n', title, '\n')
print(summary)
def getsoup():
page = setpage
resp = urllib.request.urlopen(page)
soup = bs4.BeautifulSoup(resp,'html.parser')
return soup
def remove_section(tag):
buildup=""
for x in tag:
if x.islower():
buildup = buildup+x
else:
break
if len(buildup) > 0:
buildup = tag.replace(buildup,"")
return buildup
def remove_trail(txt,delim):
txt = txt.rsplit(delim, 1)[0]
return txt
def create_link(website,url):
website = website + url
return website
def get_most_popular():
soup = getsoup()
mostpopular = (soup.find_all("li", {"class": "post-listing-list-item__post"}))
links = (soup.find_all("a",{"class": "post-listing-list-item__link"}))
link_number=5
link_names =[]
for x in range(link_number):
link_list.append(links[x].get('href'))
text = remove_section(mostpopular[x].text)
text = remove_trail(text,'Author')
link_names.append(text)
print('[',(x+1),']', text)
selection = input("\nSelect article by # >")
return selection,link_names
def main():
print("\nWelcome to the WIRED summarizer!\nChoose an article to be summarized (or enter h for help):")
choice,names = get_most_popular()
while(True):
if choice =='h':
print("1-5 to summarize an article\no to open the article\nq to quit")
choice = input("Select article by # >")
continue
elif choice == 'q':
print("Quitting..")
exit()
elif choice == 'o':
special = input("Enter the article #>")
#finish me
elif len(choice) >1:
print("Not a valid command! Enter q to quit!")
continue
choice = int(choice)
choice -=1
urlname = setpage + link_list[choice]
create_summary(urlname)
print("\n")
for x in range(len(names)):
print('[',(x+1),']',names[x])
choice = input("Select article by # >")
main()