diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst index c3445d40e9aa3e3b6b9d0fe56ac48927076190d3..00806392a672687b5889e8dfc8bf67cbe62c86a9 100644 --- a/docs/topics/logging.rst +++ b/docs/topics/logging.rst @@ -242,6 +242,47 @@ e.g. in the spider's ``__init__`` method:: If you run this spider again then INFO messages from ``scrapy.spidermiddlewares.httperror`` logger will be gone. +You can also filter log records by :class:`~logging.LogRecord` data. For +example, you can filter log records by message content using a substring or +a regular expression. Create a :class:`logging.Filter` subclass +and equip it with a regular expression pattern to +filter out unwanted messages:: + + import logging + import re + + class ContentFilter(logging.Filter): + def filter(self, record): + match = re.search(r'\d{3} [Ee]rror, retrying', record.message) + if match: + return False + +A project-level filter may be attached to the root +handler created by Scrapy, this is a wieldy way to +filter all loggers in different parts of the project +(middlewares, spider, etc.):: + + import logging + import scrapy + + class MySpider(scrapy.Spider): + # ... + def __init__(self, *args, **kwargs): + for handler in logging.root.handlers: + handler.addFilter(ContentFilter()) + +Alternatively, you may choose a specific logger +and hide it without affecting other loggers:: + + import logging + import scrapy + + class MySpider(scrapy.Spider): + # ... + def __init__(self, *args, **kwargs): + logger = logging.getLogger('my_logger') + logger.addFilter(ContentFilter()) + scrapy.utils.log module =======================