ZILLOW에 접근하여 관심있는 지역의 월세 건물의 가격, 주소, 링크를 구글폼으로 전송하여 스프레드시트에 정리할 수 있는 자동화 프로젝트를 진행해보았다.
원래는 한국사이트 직방, 다방으로 접근하고자 했으나, 데이터 접근이 너무 번거로워 ZILLOW로 대체하였다.
최종 결과
import time
from bs4 import BeautifulSoup
import requests
import lxml
# 질로우 접속
ZILLOW = "<https://www.zillow.com/homes/for_rent/1-_beds/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22mapBounds%22%3A%7B%22west%22%3A-122.58833922338867%2C%22east%22%3A-122.27831877661133%2C%22south%22%3A37.66883833297286%2C%22north%22%3A37.88159151307567%7D%2C%22mapZoom%22%3A12%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22price%22%3A%7B%22max%22%3A872627%7D%2C%22beds%22%3A%7B%22min%22%3A1%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22mp%22%3A%7B%22max%22%3A3000%7D%2C%22auc%22%3A%7B%22value%22%3Afalse%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fr%22%3A%7B%22value%22%3Atrue%7D%2C%22fsbo%22%3A%7B%22value%22%3Afalse%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22fsba%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%7D>"
header = {
"Accept-Language" : 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
}
response = requests.get(ZILLOW,headers=header)
soup = BeautifulSoup(response.content, "lxml")
# 주소 리스트 만들기
all_address = soup.find_all(name="address", class_="list-card-addr")
address_list = []
for address in all_address :
try :
address_list.append(address.getText().split(" | ")[1])
except IndexError :
address_list.append(address.getText().split(" | ")[0])
# 가격 리스트 만들기
all_prices = soup.find_all(name="div", class_="list-card-price")
price_list = []
for price in all_prices :
price_list.append(price.getText().split("+")[0].replace("/mo",""))
# 링크 리스트 만들기
all_links = soup.select(".list-card-top a")
link_list = []
for link in all_links :
if not "https" in link["href"] :
link_list.append(f"<https://www.zillow.com>{link['href']}")
else :
link_list.append(link["href"])
#셀레늄으로 구글 폼 접속
from selenium import webdriver
CHROME_DRIVER_PATH = "/Users/zzang/Desktop/chromedriver_win32/chromedriver.exe"
GOOGLE_FORM = "<https://forms.gle/9nf7XFoYNcLZX5HYA>"
driver = webdriver.Chrome(executable_path=CHROME_DRIVER_PATH)
for n in range(len(link_list)) :
driver.get(GOOGLE_FORM)
time.sleep(3)
address_answer = driver.find_element_by_xpath(
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input')
price_answer = driver.find_element_by_xpath(
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input')
link_answer = driver.find_element_by_xpath(
'//*[@id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input')
address_answer.send_keys(address_list[n])
price_answer.send_keys(price_list[n])
link_answer.send_keys(link_list[n])
driver.find_element_by_xpath('//*[@id="mG61Hd"]/div[2]/div/div[3]/div[1]/div[1]/div/span/span').click()
while(True) :
pass