Examples

Few examples of what you can do with Cabu.

Simple example

@app.route('/gizmodo_last_articles_links')
def gizmodo_last_articles():
    app.webdriver.get('http://www.gizmodo.com')
    articles_links = [i.get_attribute('href') for i in app.webdriver.find_elements_by_css_selector('h1.headline>a')]

    return jsonify({
        'message': 'Last articles',
        'articles': articles_links,
    })

Persistence

You can persist what you crawl to a database.

@app.route('/store_gizmodo_last_articles_links')
def store_gizmodo_last_articles():
    app.webdriver.get('http://www.gizmodo.com')
    articles_links = [i.get_attribute('href') for i in app.webdriver.find_elements_by_css_selector('h1.headline>a')]
    app.db.links.insertMany(articles_links)

    return jsonify({
        'message': 'Last articles from Gizmodo',
        'nb_articles_inserted': len(articles_links),
    })

More complex scenario

@app.route('/cheapest_flight')
def cheapest_flight():
    app.webdriver.get('https://www.expedia.com')
    homepage = app.webdriver.find_element_by_tag_name('html')
    tab_flight = app.webdriver.find_element_by_id('tab-flight-tab')
    tab_flight.click()

    # Selecting from elements
    origin_input = app.webdriver.find_element_by_id('flight-origin')
    destination_input = app.webdriver.find_element_by_id('flight-destination')
    departure_date_input = app.webdriver.find_element_by_id('flight-departing')
    return_date_input = app.webdriver.find_element_by_id('flight-returning')
    search_button = app.webdriver.find_element_by_id('search-button')

    # Filling infos + validation
    origin_input.send_keys('New York, NY (NYC-All Airports)')
    destination_input.send_keys('Paris, France (PAR-All Airports)')
    return_date_input.clear()

    departure_date_input.send_keys('06/01/2016')
    return_date_input.click()
    return_date_input.clear()
    return_date_input.send_keys('09/15/2016')
    search_button.click()

    WebDriverWait(app.webdriver, 60).until(staleness_of(homepage))

    # Scrap the cheapest
    flight = {}
    try:
        flight['departure_time'] = app.webdriver.find_element_by_css_selector('span.departure-time').text + 'm'
        flight['arrival_time'] = app.webdriver.find_element_by_css_selector('#flightModule1 span.arrival-time').text + 'm'
        flight['airline'] = app.webdriver.find_element_by_css_selector('#flightModule1 div.truncate').text
        flight['price'] = app.webdriver.find_element_by_css_selector('#flightModule1 .dollars').text
    except NoSuchElementException:
        message = 'No results :('
    else:
        message = 'Results retrieved !'

    return jsonify({
        'message': message,
        'flight': flight,
        'search_url': app.webdriver.current_url
    })