Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	
		FauziIsyrinApridal
		
	commited on
		
		
					Commit 
							
							·
						
						edaa275
	
1
								Parent(s):
							
							5923179
								
..
Browse files- scrapping/dosen_scrap.py +2 -2
- scrapping/jadwal_scrap.py +2 -2
- scrapping/jurusan_scrap.py +3 -3
- scrapping/pnp_scrap.py +4 -1
    	
        scrapping/dosen_scrap.py
    CHANGED
    
    | @@ -11,12 +11,12 @@ class DosenSpider(scrapy.Spider): | |
| 11 | 
             
                start_urls = ['https://sipeg.pnp.ac.id/']
         | 
| 12 |  | 
| 13 | 
             
                custom_settings = {
         | 
| 14 | 
            -
                    'DOWNLOAD_DELAY':  | 
| 15 | 
             
                    'USER_AGENT': 'PNPBot/1.0',
         | 
| 16 | 
             
                    'ROBOTSTXT_OBEY': True,
         | 
| 17 | 
             
                    'LOG_LEVEL': 'INFO',
         | 
| 18 | 
             
                    'CONCURRENT_REQUESTS': 1,
         | 
| 19 | 
            -
                    ' | 
| 20 | 
             
                    'RETRY_TIMES': 3
         | 
| 21 | 
             
                }
         | 
| 22 |  | 
|  | |
| 11 | 
             
                start_urls = ['https://sipeg.pnp.ac.id/']
         | 
| 12 |  | 
| 13 | 
             
                custom_settings = {
         | 
| 14 | 
            +
                    'DOWNLOAD_DELAY': 1,
         | 
| 15 | 
             
                    'USER_AGENT': 'PNPBot/1.0',
         | 
| 16 | 
             
                    'ROBOTSTXT_OBEY': True,
         | 
| 17 | 
             
                    'LOG_LEVEL': 'INFO',
         | 
| 18 | 
             
                    'CONCURRENT_REQUESTS': 1,
         | 
| 19 | 
            +
                    'HTTPCACHE_ENABLED': False,
         | 
| 20 | 
             
                    'RETRY_TIMES': 3
         | 
| 21 | 
             
                }
         | 
| 22 |  | 
    	
        scrapping/jadwal_scrap.py
    CHANGED
    
    | @@ -348,12 +348,12 @@ class PnpSpider(scrapy.Spider): | |
| 348 |  | 
| 349 | 
             
            if __name__ == "__main__":
         | 
| 350 | 
             
                process = CrawlerProcess(settings={
         | 
| 351 | 
            -
                    'DOWNLOAD_DELAY':  | 
| 352 | 
             
                    'USER_AGENT': 'PNPBot/1.0',
         | 
| 353 | 
             
                    'ROBOTSTXT_OBEY': True,
         | 
| 354 | 
             
                    'LOG_LEVEL': 'INFO',
         | 
|  | |
| 355 | 
             
                    'CONCURRENT_REQUESTS': 1,
         | 
| 356 | 
            -
                    'DOWNLOAD_TIMEOUT': 100,
         | 
| 357 | 
             
                    'RETRY_TIMES': 3
         | 
| 358 | 
             
                })
         | 
| 359 | 
             
                process.crawl(PnpSpider)
         | 
|  | |
| 348 |  | 
| 349 | 
             
            if __name__ == "__main__":
         | 
| 350 | 
             
                process = CrawlerProcess(settings={
         | 
| 351 | 
            +
                    'DOWNLOAD_DELAY': 1,
         | 
| 352 | 
             
                    'USER_AGENT': 'PNPBot/1.0',
         | 
| 353 | 
             
                    'ROBOTSTXT_OBEY': True,
         | 
| 354 | 
             
                    'LOG_LEVEL': 'INFO',
         | 
| 355 | 
            +
                    'HTTPCACHE_ENABLED': False,
         | 
| 356 | 
             
                    'CONCURRENT_REQUESTS': 1,
         | 
|  | |
| 357 | 
             
                    'RETRY_TIMES': 3
         | 
| 358 | 
             
                })
         | 
| 359 | 
             
                process.crawl(PnpSpider)
         | 
    	
        scrapping/jurusan_scrap.py
    CHANGED
    
    | @@ -23,12 +23,12 @@ def is_valid_prodi(nama): | |
| 23 | 
             
            class JurusanSpider(scrapy.Spider):
         | 
| 24 | 
             
                name = "jurusan"
         | 
| 25 | 
             
                custom_settings = {
         | 
| 26 | 
            -
                    'DOWNLOAD_DELAY':  | 
| 27 | 
             
                    'USER_AGENT': 'PNPBot/1.0',
         | 
| 28 | 
             
                    'ROBOTSTXT_OBEY': True,
         | 
| 29 | 
             
                    'LOG_LEVEL': 'INFO',
         | 
| 30 | 
            -
                    ' | 
| 31 | 
            -
                    ' | 
| 32 | 
             
                    'RETRY_TIMES': 3
         | 
| 33 | 
             
                }
         | 
| 34 |  | 
|  | |
| 23 | 
             
            class JurusanSpider(scrapy.Spider):
         | 
| 24 | 
             
                name = "jurusan"
         | 
| 25 | 
             
                custom_settings = {
         | 
| 26 | 
            +
                    'DOWNLOAD_DELAY': 1,
         | 
| 27 | 
             
                    'USER_AGENT': 'PNPBot/1.0',
         | 
| 28 | 
             
                    'ROBOTSTXT_OBEY': True,
         | 
| 29 | 
             
                    'LOG_LEVEL': 'INFO',
         | 
| 30 | 
            +
                    'HTTPCACHE_ENABLED': False,
         | 
| 31 | 
            +
                    'CONCURRENT_REQUESTS': 1,
         | 
| 32 | 
             
                    'RETRY_TIMES': 3
         | 
| 33 | 
             
                }
         | 
| 34 |  | 
    	
        scrapping/pnp_scrap.py
    CHANGED
    
    | @@ -27,9 +27,12 @@ class PNPContentSpider(scrapy.Spider): | |
| 27 | 
             
                ]
         | 
| 28 |  | 
| 29 | 
             
                custom_settings = {
         | 
| 30 | 
            -
                    'DOWNLOAD_DELAY':  | 
| 31 | 
             
                    'RETRY_TIMES': 3,
         | 
| 32 | 
             
                    'HTTPCACHE_ENABLED': False,
         | 
|  | |
|  | |
|  | |
| 33 | 
             
                    'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
         | 
| 34 | 
             
                }
         | 
| 35 |  | 
|  | |
| 27 | 
             
                ]
         | 
| 28 |  | 
| 29 | 
             
                custom_settings = {
         | 
| 30 | 
            +
                    'DOWNLOAD_DELAY': 1,
         | 
| 31 | 
             
                    'RETRY_TIMES': 3,
         | 
| 32 | 
             
                    'HTTPCACHE_ENABLED': False,
         | 
| 33 | 
            +
                    'ROBOTSTXT_OBEY': True,
         | 
| 34 | 
            +
                    'CONCURRENT_REQUESTS': 1,
         | 
| 35 | 
            +
                    'RETRY_ENABLED': True,  
         | 
| 36 | 
             
                    'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
         | 
| 37 | 
             
                }
         | 
| 38 |  | 
