Я наваял простенький парсер для ютубовского чата
Там сумасшедший трафик, и что-то он у меня иногда падает
Не пойму, почему:
import json
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import ui
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.options import Options
def run_scraper():
options = Options()
#options.add_argument("--headless")
browser = webdriver.Firefox(options=options, service_log_path='./gecko.log')
wait = WebDriverWait(browser, 5)
id = 'heyOYoFCJAQ'
url = "https://www.youtube.com/live_chat?v=" + str(id)
browser.get(url)
browser.implicitly_wait(1)
#innerHTML = browser.execute_script("return document.body.innerHTML")
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[contains(@class, 'style-scope yt-button-renderer')]"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[contains(@class, 'style-scope ytd-menu-service-item-renderer')]"))).click()
chats = []
with open('youtube_chat.html', 'a') as file:
file.write('<html><head><META http-equiv=Content-Type content="text/html; charset=utf-8"></head><body><table>\n')
while True:
for chat in browser.find_elements_by_css_selector('yt-live-chat-text-message-renderer'):
try:
author_name = chat.find_element_by_css_selector("#author-name").text
print(author_name)
message = chat.find_element_by_css_selector("#message").text
#print(message)
_str = '<tr><td>%s</td> <td>%s</td></tr>\n' % (author_name, message)
file.write(_str)
except Exception as ex:
print('??? error:', ex)
browser.quit()
return chats
run_scraper()