Current location - Education and Training Encyclopedia - Graduation thesis - Python crawler replaces the links in the online html page with local links and saves the html file.
Python crawler replaces the links in the online html page with local links and saves the html file.
Import operating system, re

Def check_flag (flag):

regex = re.compile(r'images\/')

If regex.match(flag) is False otherwise, result = True.

return result

# soup = beautiful soup(open(' index . html '))

Import BeautifulSoup from bs4.

html_content = ' ' '

& lta href = ""> Test 0 1

& lta href="/ 123 " >。 Test 02

& lta href = ""> Test 0 1

& lta href = ""> Test 0 1

'''

file = open(r ' favor-en . html ',' r ',encoding="UTF-8 ")

Soup = BeautifulSoup (file, "html.parser")

For elements in soup.find_all('img'):

If there is "src" in element.attrs:

print(element.attrs['src'])

if check _ flag(element . attrs[' src ']):

#if element.attrs['src']。 Find ("png"):

element . attrs[' src ']= " michenxxxxxxxxxxxx "+'/'+element . attrs[' src ']

Print ("# # # # # # # # # # # # # # # # # # # #")

Use open('index.html',' w', encoding="UTF-8 ") as fp:

FP . write(soup . pretify())# pretify()? Is to beautify sp? , readable.