from time import sleep
from bs4 import BeautifulSoup
import requests
defget_page(url, start=0):
try:
response = requests.get(url.format(str(start)))
except:
returnNonereturn response
deffind_links(html_doc):
soup = BeautifulSoup(html_doc, features='html.parser')
# in this line you have to change class attrs to recognise correctlyreturn soup.find_all('a', attrs={'class': 'result-title hdrlnk'})
defstart_crawl(url):
start = 0
crawl = True
adv_links = list()
while crawl:
response = get_page(url, start)
# in the second step you should set explicit wait for your script to load the page #completely
sleep(2)
new_links = find_links(response.text)
adv_links.extend(new_links)
print(adv_links)
start += 120
crawl = bool(len(new_links))
return adv_links
if __name__ == "__main__":
# then you have to change site's link to this
link = "https://paris.craigslist.org/search/hhh?availabilityMode=0&sale_date=all+data&s="
links = start_crawl(link)
print(links)