FauziIsyrinApridal commited on
Commit
edaa275
·
1 Parent(s): 5923179
scrapping/dosen_scrap.py CHANGED
@@ -11,12 +11,12 @@ class DosenSpider(scrapy.Spider):
11
  start_urls = ['https://sipeg.pnp.ac.id/']
12
 
13
  custom_settings = {
14
- 'DOWNLOAD_DELAY': 2,
15
  'USER_AGENT': 'PNPBot/1.0',
16
  'ROBOTSTXT_OBEY': True,
17
  'LOG_LEVEL': 'INFO',
18
  'CONCURRENT_REQUESTS': 1,
19
- 'DOWNLOAD_TIMEOUT': 100,
20
  'RETRY_TIMES': 3
21
  }
22
 
 
11
  start_urls = ['https://sipeg.pnp.ac.id/']
12
 
13
  custom_settings = {
14
+ 'DOWNLOAD_DELAY': 1,
15
  'USER_AGENT': 'PNPBot/1.0',
16
  'ROBOTSTXT_OBEY': True,
17
  'LOG_LEVEL': 'INFO',
18
  'CONCURRENT_REQUESTS': 1,
19
+ 'HTTPCACHE_ENABLED': False,
20
  'RETRY_TIMES': 3
21
  }
22
 
scrapping/jadwal_scrap.py CHANGED
@@ -348,12 +348,12 @@ class PnpSpider(scrapy.Spider):
348
 
349
  if __name__ == "__main__":
350
  process = CrawlerProcess(settings={
351
- 'DOWNLOAD_DELAY': 2,
352
  'USER_AGENT': 'PNPBot/1.0',
353
  'ROBOTSTXT_OBEY': True,
354
  'LOG_LEVEL': 'INFO',
 
355
  'CONCURRENT_REQUESTS': 1,
356
- 'DOWNLOAD_TIMEOUT': 100,
357
  'RETRY_TIMES': 3
358
  })
359
  process.crawl(PnpSpider)
 
348
 
349
  if __name__ == "__main__":
350
  process = CrawlerProcess(settings={
351
+ 'DOWNLOAD_DELAY': 1,
352
  'USER_AGENT': 'PNPBot/1.0',
353
  'ROBOTSTXT_OBEY': True,
354
  'LOG_LEVEL': 'INFO',
355
+ 'HTTPCACHE_ENABLED': False,
356
  'CONCURRENT_REQUESTS': 1,
 
357
  'RETRY_TIMES': 3
358
  })
359
  process.crawl(PnpSpider)
scrapping/jurusan_scrap.py CHANGED
@@ -23,12 +23,12 @@ def is_valid_prodi(nama):
23
  class JurusanSpider(scrapy.Spider):
24
  name = "jurusan"
25
  custom_settings = {
26
- 'DOWNLOAD_DELAY': 0.5,
27
  'USER_AGENT': 'PNPBot/1.0',
28
  'ROBOTSTXT_OBEY': True,
29
  'LOG_LEVEL': 'INFO',
30
- 'CONCURRENT_REQUESTS': 4,
31
- 'DOWNLOAD_TIMEOUT': 100,
32
  'RETRY_TIMES': 3
33
  }
34
 
 
23
  class JurusanSpider(scrapy.Spider):
24
  name = "jurusan"
25
  custom_settings = {
26
+ 'DOWNLOAD_DELAY': 1,
27
  'USER_AGENT': 'PNPBot/1.0',
28
  'ROBOTSTXT_OBEY': True,
29
  'LOG_LEVEL': 'INFO',
30
+ 'HTTPCACHE_ENABLED': False,
31
+ 'CONCURRENT_REQUESTS': 1,
32
  'RETRY_TIMES': 3
33
  }
34
 
scrapping/pnp_scrap.py CHANGED
@@ -27,9 +27,12 @@ class PNPContentSpider(scrapy.Spider):
27
  ]
28
 
29
  custom_settings = {
30
- 'DOWNLOAD_DELAY': 2,
31
  'RETRY_TIMES': 3,
32
  'HTTPCACHE_ENABLED': False,
 
 
 
33
  'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
34
  }
35
 
 
27
  ]
28
 
29
  custom_settings = {
30
+ 'DOWNLOAD_DELAY': 1,
31
  'RETRY_TIMES': 3,
32
  'HTTPCACHE_ENABLED': False,
33
+ 'ROBOTSTXT_OBEY': True,
34
+ 'CONCURRENT_REQUESTS': 1,
35
+ 'RETRY_ENABLED': True,
36
  'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
37
  }
38