An open source and collaborative framework for extracting the data you need from websites.
In a fast, simple, yet extensible way.
Maintained by
many other contributors
class
BlogSpider
(
scrapy
.
Spider
):
name
=
'
blogspider
'
start_urls
=
[
'
https://www.zyte.com/blog/
'
]
def
parse
(
self
,
response
):
for
title
in
response
.
css
(
'
.oxy-post-title
'
):
yield
{
'
title
'
:
title
.
css
(
'
::text
'
).
get
()}
for
next_page
in
response
.
css
(
'
a.next
'
):
yield
response
.
follow
(
next_page
,
self
.
parse
)
EOF
scrapy runspider myspider.py
shub schedule blogspider
shub items 26731/1/8
{"title": "Improved Frontera: Web Crawling at Scale with Python 3 Support"}
{"title": "How to Crawl the Web Politely with Scrapy"}