Source code for scrapy_item_ingest.pipelines.main

"""
Main pipeline that combines items and requests functionality.
"""
import logging

from .items import ItemsPipeline
from .requests import RequestsPipeline

logger = logging.getLogger(__name__)


[docs] class DbInsertPipeline(ItemsPipeline, RequestsPipeline): """ Main pipeline that combines item processing and request tracking. Inherits from both ItemsPipeline and RequestsPipeline. """
[docs] def __init__(self, settings): # Initialize both parent classes ItemsPipeline.__init__(self, settings) RequestsPipeline.__init__(self, settings)
[docs] @classmethod def from_crawler(cls, crawler): """Create pipeline instance from crawler""" # Use RequestsPipeline's from_crawler to get signal connections return RequestsPipeline.from_crawler.__func__(cls, crawler)
[docs] def open_spider(self, spider): """Called when spider is opened""" # Use the base class implementation super().open_spider(spider)
[docs] def close_spider(self, spider): """Called when spider is closed""" # Use the base class implementation super().close_spider(spider)
[docs] def process_item(self, item, spider): """Process and store item in database""" # Use ItemsPipeline's process_item method return ItemsPipeline.process_item(self, item, spider)