yusufenes commited on
Commit
fd483b3
·
verified ·
1 Parent(s): cc603c2

Delete get_real_home_listing.py

Browse files
Files changed (1) hide show
  1. get_real_home_listing.py +0 -117
get_real_home_listing.py DELETED
@@ -1,117 +0,0 @@
1
- from selenium import webdriver
2
- from selenium.webdriver.chrome.service import Service
3
- from selenium.webdriver.chrome.options import Options
4
- from selenium.webdriver.common.by import By
5
- from selenium.webdriver.support.ui import WebDriverWait
6
- from selenium.webdriver.support import expected_conditions as EC
7
- from selenium.common.exceptions import NoSuchElementException
8
- import json
9
- import pandas as pd
10
- import requests
11
- from bs4 import BeautifulSoup
12
-
13
- def format_price(price):
14
- # Remove dots and convert to integer
15
- price = str(price)[0:-2]
16
- clean_price = price.replace('.', '')
17
- return int(clean_price)
18
-
19
-
20
- def get_home_listings(selected_il, price_value):
21
- # Set up Chrome options
22
- chrome_options = Options()
23
- chrome_options.add_argument('--headless') # Run Chrome in headless mode
24
- chrome_options.add_argument('--start-maximized')
25
-
26
- # Initialize the Chrome driver
27
- driver = webdriver.Chrome(options=chrome_options)
28
-
29
- # Navigate to the website
30
- driver.get('https://www.emlakjet.com/')
31
-
32
- # Wait for the element to be present and then send input data
33
- search_input = WebDriverWait(driver, 10).until(
34
- EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[2]/div/div/div/input'))
35
- )
36
- search_input.send_keys(f'{selected_il}')
37
- # Click the dropdown to open the price range options
38
- dropdown_button = driver.find_element(By.XPATH, '//*[@id="headlessui-listbox-button-:rh:"]')
39
- dropdown_button.click()
40
-
41
- # Calculate lower and upper bounds
42
- # Remove periods from the price string before calculation
43
- price_value = format_price(price_value)
44
- lower_bound = price_value - 500000
45
- upper_bound = price_value + 500000
46
-
47
- # Locate the first input element and set it to the lower bound
48
- first_input = WebDriverWait(driver, 10).until(
49
- EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[1]/div[1]/div/div[1]/input'))
50
- )
51
- first_input.clear()
52
- first_input.send_keys(str(lower_bound))
53
-
54
- # Locate the second input element and set it to the upper bound
55
- second_input = WebDriverWait(driver, 10).until(
56
- EC.presence_of_element_located((By.XPATH, '//*[@id="headlessui-listbox-options-:ri:"]/ul[2]/div[1]/div/div[1]/input'))
57
- )
58
- second_input.clear()
59
- second_input.send_keys(str(upper_bound))
60
-
61
- # Click the "Find" button to perform the search
62
- find_button = WebDriverWait(driver, 10).until(
63
- EC.element_to_be_clickable((By.XPATH, '//*[@id="headlessui-tabs-panel-:r9:"]/div/div[5]/div/button'))
64
- )
65
- find_button.click()
66
-
67
- i = 1
68
- data = []
69
-
70
- while i<=10:
71
- WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')))
72
- link = driver.find_element(By.XPATH, f'//*[@id="content-wrapper"]/div[1]/div[4]/div[2]/div[3]/div[{""+str(i)+""}]/div/a')
73
- driver.get(link.get_attribute('href'))
74
- detail_url = driver.current_url
75
- # Wait for the features list to load
76
- WebDriverWait(driver, 3).until(
77
- EC.presence_of_element_located((By.ID, "ilan-hakkinda"))
78
- )
79
-
80
- try:
81
- ul = driver.find_element(By.XPATH, '//*[@id="ilan-hakkinda"]/div/div/ul')
82
- list_items = ul.find_elements(By.TAG_NAME, 'li')
83
-
84
- details = {}
85
- for item in list_items:
86
- try:
87
- key = item.find_element(By.CLASS_NAME, 'styles_key__VqMhC').text
88
- value = item.find_element(By.CLASS_NAME, 'styles_value__3QmL3').text
89
- details[key] = value
90
- except NoSuchElementException:
91
- continue
92
-
93
- # Extract the title
94
- title = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[1]/div/h1').text
95
- resim_url = driver.find_element(By.XPATH, '//*[@id="content-wrapper"]/div[2]/div[2]/div[2]/img').get_attribute('src')
96
- fiyat = driver.find_element(By.XPATH, '//*[@id="genel-bakis"]/div[1]/div[1]/div[1]/div/span').text
97
- fiyat = int(fiyat.replace('.','').replace('TL',''))
98
- # Add the URL and title to the details
99
- details['url'] = detail_url
100
- details['title'] = title
101
- details['resim_url'] = resim_url
102
- details['price'] = fiyat
103
-
104
-
105
-
106
- data.append(details)
107
-
108
- except NoSuchElementException as e:
109
- print(f"Element not found: {e}")
110
- except Exception as e:
111
- print(f"An error occurred: {e}")
112
-
113
- driver.execute_script("window.history.go(-1)")
114
- i += 1
115
-
116
- driver.quit()
117
- return data