ไม่สามารถขูดลิงก์และลิงก์ย่อยทั้งหมดจากหน้าเว็บได้

ฉันกำลังพยายามดึงลิงก์ทั้งหมดแบบวนซ้ำซึ่งอยู่ใต้ Automotive ในพื้นที่ด้านซ้ายของ หน้าเว็บ

มีความจำเป็นต้องใช้ลิงก์ที่รวบรวมทั้งหมดแบบวนซ้ำ เนื่องจากแต่ละลิงก์มีสาขา

ตัวอย่างเช่น หากคุณคลิกลิงก์ใดๆ ใต้ ยานยนต์ คุณมักจะเห็นว่ามีลิงก์อยู่สองสามลิงก์ เชื่อมต่อกับแต่ละลิงก์เป็นต้น

การใช้งานปัจจุบันของฉันดึงได้เฉพาะลิงก์จากหน้า Landing Page เท่านั้น

import requests
from bs4 import BeautifulSoup

link = 'https://www.amazon.de/-/en/gp/bestsellers/automotive/ref=zg_bs_nav_0'

def fetch_all_links(link):
    r = s.get(link)
    soup = BeautifulSoup(r.text,"html5lib")
    if not soup.select("li:has(> span.zg_selected) + ul > li > a[href]"):return
    for item in soup.select("li:has(> span.zg_selected) + ul > li > a[href]"):
        yield item.get("href")

if __name__ == '__main__':
    with requests.Session() as s:
        s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
        for elem in fetch_all_links(link):
            print(elem)

คำถาม: ฉันจะดึงลิงก์และลิงก์ย่อยทั้งหมดจากพื้นที่ด้านซ้ายของหน้าเว็บดังกล่าวได้อย่างไร


person SMTH    schedule 10.10.2020    source แหล่งที่มา


คำตอบ (3)


โค้ดต่อไปนี้ให้ผลลัพธ์จำนวนมาก ซึ่งทั้งหมดไม่สามารถพิมพ์ได้เนื่องจากข้อจำกัด StackOverflow มีลิงก์ที่ซ้ำกันจำนวนมาก (รายการเดียวกันอยู่ในหมวดหมู่ที่ต่างกัน) เวอร์ชันที่สองจะไม่พิมพ์สำเนาที่ซ้ำกัน คุณตัดสินใจว่าคุณต้องการอันไหน คุณยังสามารถตัดสินใจว่าค่าใดเป็น yield ตามความต้องการของคุณ โค้ดนี้ยังใช้เธรดพูลเพื่ออ่านลิงก์ทั้งหมดในระดับที่กำหนดพร้อมกัน:

เวอร์ชัน 1 ที่มีลิงก์ซ้ำ

import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
from functools import partial


def get_page(session, url):
    resp = session.get(url)
    return resp.text


def get_links(session, executor, page, level):
    soup = BeautifulSoup(page, 'html5lib')
    selector = 'li:has(> span.zg_selected) + ul > li > a[href]'
    links = soup.select(selector)
    if not links:
        return
    urls = [link.get('href') for link in links]
    titles = [link.text for link in links]
    pages = list(executor.map(partial(get_page, session), urls))
    for i in range(len(links)):
        yield level, titles[i], urls[i]
        yield from get_links(session, executor, pages[i], level + 1)

with requests.Session() as session:
    with ThreadPoolExecutor(max_workers=50) as executor:
        resp = session.get('https://www.amazon.de/-/en/gp/bestsellers/automotive/ref=zg_bs_nav_0')
        page = resp.text
        for level, title, href in get_links(session, executor, page, 0):
            spacing = ' ' * (level * 4)
            print(f'{spacing}{title}: {href}')

เวอร์ชัน 2 ที่ไม่มีลิงก์ซ้ำ

import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
from functools import partial


def get_page(session, url):
    resp = session.get(url)
    return resp.text

seen_links = set()

def get_links(session, executor, page, level):
    soup = BeautifulSoup(page, 'html5lib')
    selector = 'li:has(> span.zg_selected) + ul > li > a[href]'
    links = soup.select(selector)
    if not links:
        return
    new_links = []
    # don't process links we have seen:
    for link in links:
        url =  link.get('href')
        if url not in seen_links:
            new_links.append(link)
            seen_links.add(url)
    urls = [link.get('href') for link in new_links]
    titles = [link.text for link in new_links]
    pages = list(executor.map(partial(get_page, session), urls))
    for i in range(len(new_links)):
        yield level, titles[i], urls[i]
        yield from get_links(session, executor, pages[i], level + 1)

with requests.Session() as session:
    with ThreadPoolExecutor(max_workers=50) as executor:
        resp = session.get('https://www.amazon.de/-/en/gp/bestsellers/automotive/ref=zg_bs_nav_0')
        page = resp.text
        for level, title, href in get_links(session, executor, page, 0):
            spacing = ' ' * (level * 4)
            print(f'{spacing}{title}: {href}')

พิมพ์ (ส่วนหนึ่งเนื่องมาจากข้อ จำกัด ของ StackOverflow):

Car Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/79919031
    Alternators: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504415031
    Batteries & Accessories: https://www.amazon.de/-/en/gp/bestsellers/automotive/82523031
        Battery Charging Units: https://www.amazon.de/-/en/gp/bestsellers/automotive/82526031
        Battery Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933113031
        Battery Testers: https://www.amazon.de/-/en/gp/bestsellers/automotive/82527031
        Car Batteries: https://www.amazon.de/-/en/gp/bestsellers/automotive/82524031
        Jump Leads: https://www.amazon.de/-/en/gp/bestsellers/automotive/82525031
    Belts & Tensioners: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504430031
        Idler Pulleys: https://www.amazon.de/-/en/gp/bestsellers/automotive/8490903031
        Serpentine Belts: https://www.amazon.de/-/en/gp/bestsellers/automotive/14343385031
        Tensioner Levers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933115031
        Tensioner Pulleys: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933116031
        Timing Belt Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/8490902031
        Timing Belts: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504432031
        V-Drive Belts: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504431031
    Brakes: https://www.amazon.de/-/en/gp/bestsellers/automotive/79984031
        ABS & EBS Components & Accessories: https://www.amazon.de/-/en/gp/bestsellers/automotive/80010031
        Brake Shoes: https://www.amazon.de/-/en/gp/bestsellers/automotive/79987031
        Brake Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933120031
        Brake Pads: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504242031
        Braking Force Regulator: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933121031
        Power Brake Systems: https://www.amazon.de/-/en/gp/bestsellers/automotive/79999031
        Brake Pipes: https://www.amazon.de/-/en/gp/bestsellers/automotive/79989031
        Brake Disc Rotors: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504243031
        Brake Hoses, Lines & Fittings: https://www.amazon.de/-/en/gp/bestsellers/automotive/79991031
            Fittings: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933119031
            Hoses: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933117031
            Lines: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933118031
        Sensors: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504462031
        Callipers & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/79990031
            Brake Calliper Tool Sets: https://www.amazon.de/-/en/gp/bestsellers/automotive/2502065031
            Calliper Bolts & Guide Pins: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933122031
            Calliper Brackets: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933123031
            Calliper Covers: https://www.amazon.de/-/en/gp/bestsellers/automotive/14439426031
            Calliper Pistons: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933124031
            Callipers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933125031
        Brake Drums: https://www.amazon.de/-/en/gp/bestsellers/automotive/79992031
        Pneumatic Systems & Accessories: https://www.amazon.de/-/en/gp/bestsellers/automotive/79994031
        Handbrake Components: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933131031
            Handbrake Lever Return Springs: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933133031
            Handbrake Levers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933134031
            Handbrake Shoes: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933135031
            Handbrake Systems: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933136031
            Parking Cables: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933132031
        Master Cylinders: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933130031
        Master Cylinder Repair Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933129031
        Wheel Cylinders: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933142031
        Wheel Cylinder Repair Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933141031
    Car Styling & Body Fittings: https://www.amazon.de/-/en/gp/bestsellers/automotive/80019031
    Drive & Transmission: https://www.amazon.de/-/en/gp/bestsellers/automotive/2503993031
        Automatic Gearboxes: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951722031
        Bearings & Bearing Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/300703031
        Clutches & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/81484031
        Drive Shafts & Accessories: https://www.amazon.de/-/en/gp/bestsellers/automotive/81494031
        Manual Gearboxes: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951723031
        Seals: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504006031
    Engine & Engine Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/81446031
        Chip Tuning: https://www.amazon.de/-/en/gp/bestsellers/automotive/667732031
        Cooling & Heating Systems: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951724031
        Engine Blocks: https://www.amazon.de/-/en/gp/bestsellers/automotive/81449031
        Engine Compartments: https://www.amazon.de/-/en/gp/bestsellers/automotive/81447031
        Engine Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951752031
            Camshafts & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951753031
            Connecting Rods & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951759031
                Bearings: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951760031
                Connecting Rods: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951762031
                Nuts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951763031
            Crankcase Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951764031
                Gasket Sets: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951765031
                Ventilation Hoses: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951766031
                Ventilation Valves: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951767031
            Crankshafts & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951768031
                Bearings: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951769031
                Gears: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951772031
                Pulleys: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951773031
                Seals: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951774031
            Cylinder Head Gaskets: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951775031
            Cylinder Heads: https://www.amazon.de/-/en/gp/bestsellers/automotive/81450031
            Engine Gasket Sets: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951776031
            Engine Mounts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951777031
            Main Bearings: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951778031
            Main Bolts & Studs: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951779031
            Oil Pans: https://www.amazon.de/-/en/gp/bestsellers/automotive/2502036031
            Oil Pumps: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951780031
            Pistons & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951781031
                Pistons: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951784031
                Rings & Ring Sets: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951786031
            Rocker Arms: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951787031
            Sensors: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951788031
                Air Mass: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504467031
                Barometric Pressure: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504463031
                Camshaft Position: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504468031
                Crankshaft Position: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504466031
                Detonation: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504464031
                MAP Sensors: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951791031
                Oil Level: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951792031
                Throttle Position: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504469031
            Timing Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951795031
            Turbochargers: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504420031
            Valves & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951796031
                Guides: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951799031
                Seals: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951801031
                Shims: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951803031
                Springs: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951804031
                Valves: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951805031
        Petrol Supply & Treatments: https://www.amazon.de/-/en/gp/bestsellers/automotive/81457031
    Exhaust & Exhaust Systems: https://www.amazon.de/-/en/gp/bestsellers/automotive/81460031
        Catalytic Converter Kits & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951833031
        Catalytic Converter Mounting Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951834031
        Catalytic Converters: https://www.amazon.de/-/en/gp/bestsellers/automotive/81466031
        Clamps, Flanges & Hangers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951835031
        Complete Installations: https://www.amazon.de/-/en/gp/bestsellers/automotive/81467031
        End Pipes & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952563031
        Exhaust Gas Recirculation Cleaners & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/81481031
            Cleaners: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952564031
            Valve Coolers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952565031
            Valve Gaskets: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952566031
            Valves: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504131031
        Exhaust Manifolds & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952572031
        Exhaust Systems: https://www.amazon.de/-/en/gp/bestsellers/automotive/81472031
        Extension Pipes: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952568031
        Headers & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952569031
            Bolts, Studs & Nuts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952570031
            Headers: https://www.amazon.de/-/en/gp/bestsellers/automotive/81474031
        Intake Manifolds: https://www.amazon.de/-/en/gp/bestsellers/automotive/81468031
        Oxygen Sensors: https://www.amazon.de/-/en/gp/bestsellers/automotive/81469031
        Pipes: https://www.amazon.de/-/en/gp/bestsellers/automotive/81471031
        Seals: https://www.amazon.de/-/en/gp/bestsellers/automotive/81463031
        Silencer Mounting Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4952576031
        Silencers: https://www.amazon.de/-/en/gp/bestsellers/automotive/81475031
    Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504306031
        Air Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/81453031
            Cabin Air Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958533031
            Engine Air Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958534031
        Coolant Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958535031
        Fuel Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958536031
        Oil Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504307031
        Service Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958537031
        Transmission Filters: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958538031
    Fuses: https://www.amazon.de/-/en/gp/bestsellers/automotive/82508031
        Flat Fuses: https://www.amazon.de/-/en/gp/bestsellers/automotive/82509031
        Glass Fuses: https://www.amazon.de/-/en/gp/bestsellers/automotive/82510031
        Mini Fuses: https://www.amazon.de/-/en/gp/bestsellers/automotive/82511031
    Ignition & Tools: https://www.amazon.de/-/en/gp/bestsellers/automotive/81322031
        Glow Plugs: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504483031
        Ignition & Heating System: https://www.amazon.de/-/en/gp/bestsellers/automotive/81323031
        Ignition Cable: https://www.amazon.de/-/en/gp/bestsellers/automotive/81324031
        Spark Plug Connectors: https://www.amazon.de/-/en/gp/bestsellers/automotive/2502073031
        Spark Plug Wrenches: https://www.amazon.de/-/en/gp/bestsellers/automotive/2846389031
        Spark Plugs: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504494031
    Instruments: https://www.amazon.de/-/en/gp/bestsellers/automotive/81436031
        Clocks: https://www.amazon.de/-/en/gp/bestsellers/automotive/81444031
        Fuel Gauges: https://www.amazon.de/-/en/gp/bestsellers/automotive/81441031
        Instrument Panels: https://www.amazon.de/-/en/gp/bestsellers/automotive/81439031
        Nuts & Bolts: https://www.amazon.de/-/en/gp/bestsellers/automotive/81440031
        Oil Temperature Indicators: https://www.amazon.de/-/en/gp/bestsellers/automotive/81442031
        Rev Counters: https://www.amazon.de/-/en/gp/bestsellers/automotive/81437031
        Speedometers: https://www.amazon.de/-/en/gp/bestsellers/automotive/81438031
        Water Temperature Indicators: https://www.amazon.de/-/en/gp/bestsellers/automotive/81445031
    Interior Fittings: https://www.amazon.de/-/en/gp/bestsellers/automotive/81361031
    Lights, Bulbs & Indicators: https://www.amazon.de/-/en/gp/bestsellers/automotive/81326031
        HID Conversion Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958714031
        Light Bulbs: https://www.amazon.de/-/en/gp/bestsellers/automotive/81345031
            Exterior Light Bulbs: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958715031
            Interior Light Bulbs: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958716031
            Special Bulbs: https://www.amazon.de/-/en/gp/bestsellers/automotive/81351031
            Universal Bulb Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958720031
        Lighting Assemblies & Components: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958721031
            Angel Eyes: https://www.amazon.de/-/en/gp/bestsellers/automotive/81327031
            Daytime Running Lights: https://www.amazon.de/-/en/gp/bestsellers/automotive/81331031
            Fog Light Assemblies: https://www.amazon.de/-/en/gp/bestsellers/automotive/81338031
            Headlight Assemblies: https://www.amazon.de/-/en/gp/bestsellers/automotive/81336031
            Headlight Components & Accessories: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958722031
                Bezels: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958723031
                Brackets: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958724031
                Bulb Holders: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958725031
                Covers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958726031
                Eyebrows: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958727031
                Housings: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958728031
                Lenses: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958729031
            Interior Lighting: https://www.amazon.de/-/en/gp/bestsellers/automotive/81332031
            Neon Spots & Bars: https://www.amazon.de/-/en/gp/bestsellers/automotive/81342031
            Number Plate Lights: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504238031
            Parking Lights: https://www.amazon.de/-/en/gp/bestsellers/automotive/81333031
            Rear Light Assemblies: https://www.amazon.de/-/en/gp/bestsellers/automotive/81330031
            Rear Light Components & Accessories: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958732031
                Lenses: https://www.amazon.de/-/en/gp/bestsellers/automotive/4958735031
            Side Lights: https://www.amazon.de/-/en/gp/bestsellers/automotive/81343031
            Third Brake Lights: https://www.amazon.de/-/en/gp/bestsellers/automotive/81335031
            Turn Signal Lights: https://www.amazon.de/-/en/gp/bestsellers/automotive/81328031
            Underbody Light Stripes & Kits: https://www.amazon.de/-/en/gp/bestsellers/automotive/81334031
    Sensors: https://www.amazon.de/-/en/gp/bestsellers/automotive/81502031
        Fuel Injection: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951829031
        Coolant Temperature: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504465031
            Engine Cooling & Heating: https://www.amazon.de/-/en/gp/bestsellers/automotive/81452031
            Compressors: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951737031
            Engine Fans: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951739031
            Expansion Valves: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951740031
            Fittings: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951741031
            Heater Cores: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951742031
            Intercoolers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951743031
            Oil Pressure Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964580031
            Radiators & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951745031
                Radiator Caps: https://www.amazon.de/-/en/gp/bestsellers/automotive/11416518031
                Radiator Fan Clutches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951746031
                Radiator Fans: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951747031
                Radiator Hoses: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951748031
                Radiator Supports: https://www.amazon.de/-/en/gp/bestsellers/automotive/14301191031
                Radiators: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951749031
            Temperature Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964585031
            Thermostats: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951750031
            Water Pumps: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951751031
        Ambient Air Temperature: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504471031
            AC & Interior Heating: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951725031
            Accumulators: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951726031
            Blower Motor Fan Resistors: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951727031
            Blower Motors: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951728031
            Condenser Fans: https://www.amazon.de/-/en/gp/bestsellers/automotive/5121980031
            Condensers & Secondary Radiators: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951729031
            Heat Exchangers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951730031
            Heater Control Valves: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951731031
            Heater Hoses: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951732031
            Heater Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951733031
            Receiver Dryers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4951735031
    Starters: https://www.amazon.de/-/en/gp/bestsellers/automotive/2503992031
    Steering & Suspension: https://www.amazon.de/-/en/gp/bestsellers/automotive/81353031
    Switches & Relays: https://www.amazon.de/-/en/gp/bestsellers/automotive/81498031
        Relays: https://www.amazon.de/-/en/gp/bestsellers/automotive/81499031
        Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/81501031
            Brake Light Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964571031
            Push Button Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964581031
            Blower: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964570031
            Interior Light: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964579031
            Toggle: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964587031
            Clutch: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964573031
            Air Pressure: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964569031
            Reverse Light Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964583031
            Circuit Breaker: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964572031
            Door Jamb: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964574031
            Ignition Starter: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964578031
    Windscreen Wipers & Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/83086031
        Replacement Blades: https://www.amazon.de/-/en/gp/bestsellers/automotive/83089031
        Washer Pumps: https://www.amazon.de/-/en/gp/bestsellers/automotive/4964590031
        Windscreen Wipers: https://www.amazon.de/-/en/gp/bestsellers/automotive/83088031
        Wiper Motors: https://www.amazon.de/-/en/gp/bestsellers/automotive/83087031

หมายเหตุ

ลิงก์ด้านบนไม่มีส่วนต่อท้าย เช่น /ref=zg_bs_nav_auto_1_auto/262-0351978-4670353 ซึ่งดูเหมือนจะเป็นพารามิเตอร์ผู้อ้างอิงบางประเภท แต่ก็ใช้ได้ดีเหมือนกัน หากคุณต้องการลิงก์แบบเต็ม ให้ใส่ส่วนหัว User-Agent (ไม่สำคัญว่าเว็บไซต์นี้จะเป็นอย่างไร) ตัวอย่างเช่น:

with requests.Session() as session:
    session.headers['User-Agent'] = 'my-app/0.0.1' # add this line

จากนั้นผลลัพธ์จะเป็นดังนี้ แต่ท้ายที่สุดแล้ว ฉันคิดว่าพารามิเตอร์ผู้อ้างอิงค่อนข้างไม่มีความหมายเมื่อพิจารณาถึงวิธีการดึงข้อมูลหน้าเว็บ:

Car Parts: https://www.amazon.de/-/en/gp/bestsellers/automotive/79919031/ref=zg_bs_nav_auto_1_auto/260-7159013-6357941
    Alternators: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504415031/ref=zg_bs_nav_auto_2_79919031
    Batteries & Accessories: https://www.amazon.de/-/en/gp/bestsellers/automotive/82523031/ref=zg_bs_nav_auto_2_79919031
        Battery Charging Units: https://www.amazon.de/-/en/gp/bestsellers/automotive/82526031/ref=zg_bs_nav_auto_3_82523031
        Battery Switches: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933113031/ref=zg_bs_nav_auto_3_82523031
        Battery Testers: https://www.amazon.de/-/en/gp/bestsellers/automotive/82527031/ref=zg_bs_nav_auto_3_82523031
        Car Batteries: https://www.amazon.de/-/en/gp/bestsellers/automotive/82524031/ref=zg_bs_nav_auto_3_82523031
        Jump Leads: https://www.amazon.de/-/en/gp/bestsellers/automotive/82525031/ref=zg_bs_nav_auto_3_82523031
    Belts & Tensioners: https://www.amazon.de/-/en/gp/bestsellers/automotive/2504430031/ref=zg_bs_nav_auto_2_79919031
        Idler Pulleys: https://www.amazon.de/-/en/gp/bestsellers/automotive/8490903031/ref=zg_bs_nav_auto_3_2504430031
        Serpentine Belts: https://www.amazon.de/-/en/gp/bestsellers/automotive/14343385031/ref=zg_bs_nav_auto_3_2504430031
        Tensioner Levers: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933115031/ref=zg_bs_nav_auto_3_2504430031
        Tensioner Pulleys: https://www.amazon.de/-/en/gp/bestsellers/automotive/4933116031/ref=zg_bs_nav_auto_3_2504430031
etc.
person Booboo    schedule 12.10.2020

ถ้าคุณตรวจสอบยานยนต์ในเบราว์เซอร์ คุณจะเห็นว่าลิงก์ย่อยจะไม่โหลดจนกว่าคุณจะเปิดลิงก์ในหน้า Landing Page ในกรณีนี้ เมื่อคุณเปิดที่จอดรถ ลิงก์ย่อยของที่จอดรถจะถูกโหลด

คุณกำลังส่งคำขอเพียงคำขอเดียว ดังนั้นคำขอจะโหลดลิงก์ของหน้า Landing Page เท่านั้น คุณจะต้องวนคำขอผ่านลิงก์ของหน้า Landing Page เพื่อรับลิงก์ย่อยทั้งหมด

import requests
from bs4 import BeautifulSoup

link = 'https://www.amazon.de/-/en/gp/bestsellers/automotive/ref=zg_bs_nav_0'

def fetch_all_links(link):
    landing_page_links = []
    r = s.get(link)
    soup = BeautifulSoup(r.text,"html5lib")
    if not soup.select("li:has(> span.zg_selected) + ul > li > a[href]"):return
    for item in soup.select("li:has(> span.zg_selected) + ul > li > a[href]"):
        landing_page_links.append(item.get("href"))
    yield landing_page_links
def fetch_sub_links():
    links = fetch_all_links()
    for link in links:
        r = s.get(link)
        soup = BeautifulSoup(r.text,"html5lib")
        for item in soup.select("li:has(> span.zg_selected) + ul > li > a[href]"):
            yield item.get("href")    
    
if __name__ == '__main__':
        
    with requests.Session() as s:
        s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
        for elem in fetch_sub_links():
            print(elem)

โปรดทราบว่านี่อาจไม่ทำให้คุณได้รับลิงก์ย่อยทั้งหมดเพียงสองระดับแรก แต่ฉันหวังว่าคุณจะเข้าใจแล้วว่าควรเป็นอย่างไร

person Assad Ali    schedule 13.10.2020

นี่เป็นวิธีหนึ่งที่คุณสามารถบรรลุเป้าหมายเดียวกันได้

import requests
from bs4 import BeautifulSoup

link = 'https://www.amazon.de/-/en/gp/bestsellers/automotive/ref=zg_bs_nav_0'
unique_links = set()

def get_links(s,link):
    r = s.get(link)
    soup = BeautifulSoup(r.text,"lxml")
    link_list = []
    for item in soup.select("li:has(> span.zg_selected) + ul > li > a[href]"):
        item_link = item.get("href")
        if item_link not in unique_links:
            link_list.append(item_link)
            yield from print_links(item_link)
        unique_links.add(item_link)
        
    for new_link in link_list:
        yield from get_links(s,new_link)

def print_links(link):
    yield link

if __name__ == '__main__':
    with requests.Session() as s:
        s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
        for item in get_links(s,link):
            print(item)
person MITHU    schedule 12.10.2020