Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
xxxfzxxx authored Nov 18, 2022
1 parent 599cc09 commit 817d6cf
Show file tree
Hide file tree
Showing 2 changed files with 14,733 additions and 0 deletions.
65 changes: 65 additions & 0 deletions src/backend/database/scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from bs4 import BeautifulSoup
import re
import sqlite3

con = sqlite3.connect("database_prod.db")


def scraper():
with open("uiucApts.htm", "r") as f:
content = f.read()
soup = BeautifulSoup(content, features="html.parser")
apts = soup.find_all("div", class_="c-list")
for i, apt in enumerate(apts):
mobileHeader = apt.find("div", class_="mobileHeader")
imageSection = apt.find("div", class_="slick-track")
campusDetail = apt.find("div", class_="campusDetail")

apt_name = mobileHeader.find("a").text
apt_url = mobileHeader.find("a")["href"]
apt_addr = mobileHeader.find("span", class_="ellipsis").text
price_range = campusDetail.find("em", class_="rent_style").text.strip()
apt_price_min, apt_price_max = format_price(price_range)

writeToAptDB(i, apt_name, apt_addr, apt_price_min, apt_price_max, apt_url)
if imageSection:
imgs = imageSection.findAll("img")
for idx, img in enumerate(imgs):
if idx == 0:
writeToImgDB(i, img["src"])
if idx > 0:
writeToImgDB(i, img["data-lazy"])
con.commit()


def format_price(price_range):
if re.search("[a-aA-Z]", price_range):
return -1, -1 # price not avaiable
res = re.findall(r"\d+", price_range)
if len(res) == 1:
return [int(res[0]), int(res[0])] # only has one price
if len(res) == 4: # decimal price
return [int(res[0]), int(res[2])]
return [int(res[0]), int(res[1])]


def writeToAptDB(id, name, addr, pmin, pmax, url):
cur = con.cursor()
cur.execute(
"INSERT INTO Apartments (apt_id, apt_name, apt_address, price_min, price_max, link) \
VALUES (?, ?, ?, ?, ?, ?)",
(id, name, addr, pmin, pmax, url),
)


def writeToImgDB(id, url):
cur = con.cursor()
cur.execute(
"INSERT INTO AptPics (apt_id, link) \
VALUES (?, ?)",
(id, url),
)


scraper()
print("Import to DB successfully.")
Loading

3 comments on commit 817d6cf

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
app.py1310100% 
config.py10100% 
decorators.py270100% 
dataholders
   apt.py90100% 
   mainpage_get.py150100% 
   review.py70100% 
   user.py80100% 
pages
   login.py350100% 
   mainpage.py1000100% 
   userpage.py50688%37, 51, 83–91
TOTAL383698% 

Tests Skipped Failures Errors Time
43 0 💤 0 ❌ 0 🔥 0.942s ⏱️

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
app.py1310100% 
config.py10100% 
decorators.py270100% 
dataholders
   apt.py90100% 
   mainpage_get.py150100% 
   review.py70100% 
   user.py80100% 
pages
   login.py350100% 
   mainpage.py1000100% 
   userpage.py50688%37, 51, 83–91
TOTAL383698% 

Tests Skipped Failures Errors Time
43 0 💤 0 ❌ 0 🔥 1.065s ⏱️

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
app.py1310100% 
config.py10100% 
decorators.py270100% 
dataholders
   apt.py90100% 
   mainpage_get.py150100% 
   review.py70100% 
   user.py80100% 
pages
   login.py350100% 
   mainpage.py1000100% 
   userpage.py50688%37, 51, 83–91
TOTAL383698% 

Tests Skipped Failures Errors Time
43 0 💤 0 ❌ 0 🔥 0.880s ⏱️

Please sign in to comment.