/extract-emails

Extract emails from a given website

Primary LanguagePythonMIT LicenseMIT

Extract Emails

PyPI version

Extract emails from a given website

Documentation

Requirements

  • Python >= 3.6
  • requests
  • selenium

Installation

pip install extract_emails

Usage

With default browsers

from extract_emails import EmailExtractor
from extract_emails.browsers import ChromeBrowser


with ChromeBrowser() as browser:
    email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2)
    emails = email_extractor.get_emails()


for email in emails:
    print(email)
    print(email.as_dict())

# Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/")
# {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
# Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/")
# {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
from extract_emails import EmailExtractor
from extract_emails.browsers import RequestsBrowser


with RequestsBrowser() as browser:
    email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2)
    emails = email_extractor.get_emails()


for email in emails:
    print(email)
    print(email.as_dict())

# Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/")
# {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
# Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/")
# {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}

With custom browser

from extract_emails import EmailExtractor
from extract_emails.browsers import BrowserInterface

from selenium import webdriver
from selenium.webdriver.firefox.options import Options


class FirefoxBrowser(BrowserInterface):
    def __init__(self):
        ff_options = Options()
        self._driver = webdriver.Firefox(
            options=ff_options, executable_path="/home/di/geckodriver",
        )

    def close(self):
        self._driver.quit()

    def get_page_source(self, url: str) -> str:
        self._driver.get(url)
        return self._driver.page_source


with FirefoxBrowser() as browser:
    email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2)
    emails = email_extractor.get_emails()

for email in emails:
    print(email)
    print(email.as_dict())

# Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/")
# {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
# Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/")
# {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}