import os
import csv
import json
import requests
from bs4 import BeautifulSoup
def get_comments(comic_name, comic_url, output_dir):
try:
response = requests.get(comic_url)
soup = BeautifulSoup(response.content, 'html.parser')
comment_bodies = soup.find_all("div", class_="comment-body")
reply_bodies = soup.find_all("div", class_="media-body ml-3")
if not comment_bodies:
print(f"No comments found for {comic_name}.")
return
if not reply_bodies:
print(f"No Replies found for {comment_body}.")
return
comic_dir = output_dir
os.makedirs(comic_dir, exist_ok=True)
csv_file = os.path.join(comic_dir, f"{comic_name}_comments.csv")
with open(csv_file, mode='w', newline='', encoding='utf-8') as
csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(['Comment'])
csv_writer.writerow(['Replies'])
for comment_body in comment_bodies:
comment_text_elem = comment_body.find("p")
if comment_text_elem:
comment_text = comment_text_elem.get_text(strip=True)
csv_writer.writerow([comment_text])
for reply_bodie in reply_bodies:
reply_text_elem = reply_bodie.find('p')
if reply_text_elem:
reply_text = reply_text_elem.get_text(strip=True)
csv_writer.writerow([reply_text])
json_file = os.path.join(comic_dir, f"{comic_name}_comments.json")
with open(json_file, mode='w', encoding='utf-8') as jsonfile:
comments = []
for comment_body in comment_bodies:
comment_text_elem = comment_body.find("p")
if comment_text_elem:
comment_text = comment_text_elem.get_text(strip=True)
comments.append(comment_text)
json.dump(comments, jsonfile, ensure_ascii=False, indent=4)
except Exception as e:
print(f"Error: {e}")
def get_comic_comments(comic_name, comic_url, output_dir, page_number):
try:
base_url = "
https://www.gocomics.com"
full_comic_url =
f"{base_url}{comic_url}?comments=visible#comments"
print(f"Scraping comments for {comic_name}")
get_comments(comic_name, full_comic_url, output_dir)
except Exception as e:
print(f"Error: {e}")
def scrape_comics(category, output_dir):
page_num = 1
base_url = ""
if category == "Trending":
base_url = "
https://www.gocomics.com/comics/trending"
elif category == "Political":
base_url = "
https://www.gocomics.com/comics/political"
elif category == "Web Comics":
base_url = "
https://www.gocomics.com/comics/web-comics"
elif category == "Popular":
base_url = "
https://www.gocomics.com/comics/popular"
elif category == "A-to-Z":
base_url = "
https://www.gocomics.com/comics/a-to-z"
else:
print("Invalid category.")
return
category_dir = os.path.join(output_dir, category)
os.makedirs(category_dir, exist_ok=True)
while True:
page_url = f"{base_url}?page={page_num}"
print(f"Scraping page {page_num} for {category}: {page_url}")
try:
response = requests.get(page_url)
soup = BeautifulSoup(response.content, 'html.parser')
comic_links = soup.find_all("a", class_="gc-blended-link")
if not comic_links:
break
for link in comic_links:
comic_name = link.get_text(strip=True)
comic_url = link['href']
comic_output_dir = os.path.join(category_dir,
str(page_num), comic_name)
get_comic_comments(comic_name, comic_url,
comic_output_dir, page_num)
next_button = soup.find("a", class_="btn btn-primary
gc-button", string="Next ")
if not next_button:
break
page_num += 1
except Exception as e:
print(f"Error: {e}")
break
output_dir = "data"
while True:
print("\nMenu:")
print("1. Trending")
print("2. Political")
print("3. Web Comics")
print("4. Popular")
print("5. A-to-Z")
print("q. Quit")
choice = input("Enter your choice: ")
if choice == "1":
scrape_comics("Trending", output_dir)
elif choice == "2":
scrape_comics("Political", output_dir)
elif choice == "3":
scrape_comics("Web Comics", output_dir)
elif choice == "4":
scrape_comics("Popular", output_dir)
elif choice == "5":
scrape_comics("A-to-Z", output_dir)
elif choice.lower() == "q":
print("Exiting program.")
break
else:
print("Invalid choice. Please try again.")