ندادن خروجی

جامعه‌ها وبلاگ شروع یادگیری برنامه نویسی

سلام کد من خروجی‌ای بهم نمیده. سرچ و اینا زدم ولی نفهیمدم مشکلش چیه.

قبل از اینکه تو خط 19، اسم کلاس رو بدم لینک برمیگردوند. ولی از اونحا به بعد لینکی چاپ نمیکنه. تابع start_crwal هم خروجی نمیده.

c868-Screenshot (42).png 7611-Screenshot (43).png 9da2-Screenshot (41).png

سلام،

کدتون رو پیوست کنید.

محسن موحد ۰۵ شهریور ۱۴۰۲، ۱۴:۵۲

درود صبا عزیز وقتتون بخیر

لطف کنید سورس کد رو بارگذاری کنید در اسرع وقت بررسی میکنم.

Reza Mobaraki ۰۵ شهریور ۱۴۰۲، ۱۹:۱۵

from bs4 import BeautifulSoup
import requests
def get_page(url, start=0):
    try:
        response = requests.get(url.format(str(start)))
    except:
        return None
    return response
def find_links(html_doc):
    soup = BeautifulSoup(html_doc, 'html.parser')
    return soup.find_all('a', attrs={'class': 'cl-app-anchor'})
def start_crawl(url):
    start = 0
    crawl = True
    adv_links = list()
    while crawl:
        response = get_page(url, start)
        new_links = find_links(response.text)
        adv_links.extend(new_links)
        start += 120
        crawl = bool(len(new_links))
    return adv_links
if __name__ == "__main__":
    link = "https://paris.craigslist.org/search/hhh?availabilityMode=0&lang=en&cc=gb&s={}"
    links = start_crawl(link)

صبا عبدی ۰۶ شهریور ۱۴۰۲، ۰۵:۴۳

توی سه بخش باید تغیرات اینجاد کنید

from time import sleep
from bs4 import BeautifulSoup
import requests
def get_page(url, start=0):
    try:
        response = requests.get(url.format(str(start)))
    except:
        return None
    return response
def find_links(html_doc):
    soup = BeautifulSoup(html_doc, features='html.parser')
    # in this line you have to change class attrs to recognise correctly
    return soup.find_all('a', attrs={'class': 'result-title hdrlnk'})
def start_crawl(url):
    start = 0
    crawl = True
    adv_links = list()
    while crawl:
        response = get_page(url, start)
        # in the second step you should set explicit wait for your script to load the page #completely
        sleep(2)
        new_links = find_links(response.text)
        adv_links.extend(new_links)
        print(adv_links)
        start += 120
        crawl = bool(len(new_links))
    return adv_links
if __name__ == "__main__":
    # then you have to change site's link to this 
    link = "https://paris.craigslist.org/search/hhh?availabilityMode=0&sale_date=all+data&s="
    links = start_crawl(link)
    print(links)

بهترین پاسخ

Reza Mobaraki ۰۶ شهریور ۱۴۰۲، ۱۱:۵۳