python+selenium+twitter+cookie

/ 0评 / 0

一、selenium获取

from selenium.webdriver.chrome import webdriver
from selenium.webdriver.chrome.options import Options
import chromedriver_autoinstaller

def init_driver(headless=True, proxy=None, show_images=False, option=None):
    # create instance of web driver
    chromedriver_path = chromedriver_autoinstaller.install()
    # options
    options = Options()
    if headless is True:
        print("Scraping on headless mode.")
        options.add_argument('--disable-gpu')
        options.headless = True
    else:
        options.headless = False
    options.add_argument('log-level=3')
    if proxy is not None:
        options.add_argument('--proxy-server=%s' % proxy)
        print("using proxy : ", proxy)
    if show_images == False:
        prefs = {"profile.managed_default_content_settings.images": 2}
        options.add_experimental_option("prefs", prefs)
    if option is not None:
        options.add_argument(option)
    driver = webdriver.Chrome(options=options)
    driver.set_page_load_timeout(100)
    return driver

二、cookie的获取

通过登录来获取cookie

import dotenv
import os
from pathlib import Path
from time import sleep
import randomfrom Scweet.const 
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
import json

current_dir = Path(__file__).parent.absolute()


def log_in(driver, env, timeout=20, wait=4):
    email = get_email(env)  # const.EMAIL
    password = get_password(env)  # const.PASSWORD
    username = get_username(env)  # const.USERNAM

    driver.get('https://twitter.com/i/flow/login')
    sleep(wait)

    email_xpath = '//input[@autocomplete="username"]'
    password_xpath = '//input[@autocomplete="current-password"]'
    username_xpath = '//input[@data-testid="ocfEnterTextTextInput"]'

    sleep(random.uniform(wait, wait + 1))

    # enter email
    email_el = driver.find_element_by_xpath(email_xpath)
    sleep(random.uniform(wait, wait + 1))
    email_el.send_keys(email)
    sleep(random.uniform(wait, wait + 1))
    email_el.send_keys(Keys.RETURN)
    sleep(random.uniform(wait, wait + 1))
    # in case twitter spotted unusual login activity : enter your username
    if check_exists_by_xpath(username_xpath, driver):
        username_el = driver.find_element_by_xpath(username_xpath)
        sleep(random.uniform(wait, wait + 1))
        username_el.send_keys(phone)
        sleep(random.uniform(wait, wait + 1))
        username_el.send_keys(Keys.RETURN)
        sleep(random.uniform(wait, wait + 1))
    # enter password
    password_el = driver.find_element_by_xpath(password_xpath)
    password_el.send_keys(password)
    sleep(random.uniform(wait, wait + 1))
    password_el.send_keys(Keys.RETURN)
    sleep(random.uniform(wait, wait + 1))


def check_exists_by_xpath(xpath, driver):
    timeout = 3
    try:
        driver.find_element_by_xpath(xpath)
    except NoSuchElementException:
        return False
    return True

def load_env_variable(key, default_value=None, none_allowed=False): 
   v = os.getenv(key, default=default_value)   
   if v is None and not none_allowed:        
       raise RuntimeError(f"{key} returned {v} but this is not allowed!")

def get_email(env):
    dotenv.load_dotenv(env, verbose=True)
    return load_env_variable("SCWEET_EMAIL", none_allowed=True)


def get_password(env):
    dotenv.load_dotenv(env, verbose=True)
    return load_env_variable("SCWEET_PASSWORD", none_allowed=True)


def get_username(env):
    dotenv.load_dotenv(env, verbose=True)
    return load_env_variable("SCWEET_USERNAME", none_allowed=True)

三、保存cookie

savedCookies = driver.get_cookies()
    with open('cookies.json', 'w') as f:
        json.dump(savedCookies, f) 
  

四、读取cookie

with open('cookies.json', 'r') as f:
    cookies = json.load(f)

五、设置cookie

# 必须重新访问一次登录界面,要不会报错 #selenium.common.exceptions.InvalidCookieDomainException: Message: invalid cookie domain
driver.get('https://twitter.com/i/flow/login')
# 删除原来的
driver.delete_all_cookies()
for cookie in cookies:
     driver.add_cookie(cookie)
driver.get('https://twitter.com')