一、selenium获取
from selenium.webdriver.chrome import webdriver
from selenium.webdriver.chrome.options import Options
import chromedriver_autoinstaller
def init_driver(headless=True, proxy=None, show_images=False, option=None):
# create instance of web driver
chromedriver_path = chromedriver_autoinstaller.install()
# options
options = Options()
if headless is True:
print("Scraping on headless mode.")
options.add_argument('--disable-gpu')
options.headless = True
else:
options.headless = False
options.add_argument('log-level=3')
if proxy is not None:
options.add_argument('--proxy-server=%s' % proxy)
print("using proxy : ", proxy)
if show_images == False:
prefs = {"profile.managed_default_content_settings.images": 2}
options.add_experimental_option("prefs", prefs)
if option is not None:
options.add_argument(option)
driver = webdriver.Chrome(options=options)
driver.set_page_load_timeout(100)
return driver
二、cookie的获取
通过登录来获取cookie
import dotenv
import os
from pathlib import Path
from time import sleep
import randomfrom Scweet.const
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
import json
current_dir = Path(__file__).parent.absolute()
def log_in(driver, env, timeout=20, wait=4):
email = get_email(env) # const.EMAIL
password = get_password(env) # const.PASSWORD
username = get_username(env) # const.USERNAM
driver.get('https://twitter.com/i/flow/login')
sleep(wait)
email_xpath = '//input[@autocomplete="username"]'
password_xpath = '//input[@autocomplete="current-password"]'
username_xpath = '//input[@data-testid="ocfEnterTextTextInput"]'
sleep(random.uniform(wait, wait + 1))
# enter email
email_el = driver.find_element_by_xpath(email_xpath)
sleep(random.uniform(wait, wait + 1))
email_el.send_keys(email)
sleep(random.uniform(wait, wait + 1))
email_el.send_keys(Keys.RETURN)
sleep(random.uniform(wait, wait + 1))
# in case twitter spotted unusual login activity : enter your username
if check_exists_by_xpath(username_xpath, driver):
username_el = driver.find_element_by_xpath(username_xpath)
sleep(random.uniform(wait, wait + 1))
username_el.send_keys(phone)
sleep(random.uniform(wait, wait + 1))
username_el.send_keys(Keys.RETURN)
sleep(random.uniform(wait, wait + 1))
# enter password
password_el = driver.find_element_by_xpath(password_xpath)
password_el.send_keys(password)
sleep(random.uniform(wait, wait + 1))
password_el.send_keys(Keys.RETURN)
sleep(random.uniform(wait, wait + 1))
def check_exists_by_xpath(xpath, driver):
timeout = 3
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
def load_env_variable(key, default_value=None, none_allowed=False):
v = os.getenv(key, default=default_value)
if v is None and not none_allowed:
raise RuntimeError(f"{key} returned {v} but this is not allowed!")
def get_email(env):
dotenv.load_dotenv(env, verbose=True)
return load_env_variable("SCWEET_EMAIL", none_allowed=True)
def get_password(env):
dotenv.load_dotenv(env, verbose=True)
return load_env_variable("SCWEET_PASSWORD", none_allowed=True)
def get_username(env):
dotenv.load_dotenv(env, verbose=True)
return load_env_variable("SCWEET_USERNAME", none_allowed=True)
三、保存cookie
savedCookies = driver.get_cookies()
with open('cookies.json', 'w') as f:
json.dump(savedCookies, f)
四、读取cookie
with open('cookies.json', 'r') as f:
cookies = json.load(f)
五、设置cookie
# 必须重新访问一次登录界面,要不会报错 #selenium.common.exceptions.InvalidCookieDomainException: Message: invalid cookie domain
driver.get('https://twitter.com/i/flow/login')
# 删除原来的
driver.delete_all_cookies()
for cookie in cookies:
driver.add_cookie(cookie)
driver.get('https://twitter.com')