amaye15 commited on
Commit
a976cb6
·
1 Parent(s): 8a2df9f

Debug - Recurision Error

Browse files
Files changed (1) hide show
  1. main.py +58 -0
main.py CHANGED
@@ -474,18 +474,71 @@ async def crawl_sync(request: CrawlRequest) -> Dict[str, Any]:
474
  raise HTTPException(status_code=408, detail="Task timed out")
475
 
476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
  @app.post(
478
  "/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
479
  )
480
  async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
 
481
  try:
 
482
  crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
 
 
 
483
  extraction_strategy = crawler_service._create_extraction_strategy(
484
  request.extraction_config
485
  )
 
486
 
487
  try:
488
  if isinstance(request.urls, list):
 
489
  results = await crawler.arun_many(
490
  urls=[str(url) for url in request.urls],
491
  extraction_strategy=extraction_strategy,
@@ -498,8 +551,10 @@ async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
498
  session_id=request.session_id,
499
  **request.extra,
500
  )
 
501
  return {"results": [result.dict() for result in results]}
502
  else:
 
503
  result = await crawler.arun(
504
  url=str(request.urls),
505
  extraction_strategy=extraction_strategy,
@@ -512,9 +567,12 @@ async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
512
  session_id=request.session_id,
513
  **request.extra,
514
  )
 
515
  return {"result": result.dict()}
516
  finally:
 
517
  await crawler_service.crawler_pool.release(crawler)
 
518
  except Exception as e:
519
  logger.error(f"Error in direct crawl: {str(e)}")
520
  raise HTTPException(status_code=500, detail=str(e))
 
474
  raise HTTPException(status_code=408, detail="Task timed out")
475
 
476
 
477
+ # @app.post(
478
+ # "/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
479
+ # )
480
+ # async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
481
+ # try:
482
+ # crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
483
+ # extraction_strategy = crawler_service._create_extraction_strategy(
484
+ # request.extraction_config
485
+ # )
486
+
487
+ # try:
488
+ # if isinstance(request.urls, list):
489
+ # results = await crawler.arun_many(
490
+ # urls=[str(url) for url in request.urls],
491
+ # extraction_strategy=extraction_strategy,
492
+ # js_code=request.js_code,
493
+ # wait_for=request.wait_for,
494
+ # css_selector=request.css_selector,
495
+ # screenshot=request.screenshot,
496
+ # magic=request.magic,
497
+ # cache_mode=request.cache_mode,
498
+ # session_id=request.session_id,
499
+ # **request.extra,
500
+ # )
501
+ # return {"results": [result.dict() for result in results]}
502
+ # else:
503
+ # result = await crawler.arun(
504
+ # url=str(request.urls),
505
+ # extraction_strategy=extraction_strategy,
506
+ # js_code=request.js_code,
507
+ # wait_for=request.wait_for,
508
+ # css_selector=request.css_selector,
509
+ # screenshot=request.screenshot,
510
+ # magic=request.magic,
511
+ # cache_mode=request.cache_mode,
512
+ # session_id=request.session_id,
513
+ # **request.extra,
514
+ # )
515
+ # return {"result": result.dict()}
516
+ # finally:
517
+ # await crawler_service.crawler_pool.release(crawler)
518
+ # except Exception as e:
519
+ # logger.error(f"Error in direct crawl: {str(e)}")
520
+ # raise HTTPException(status_code=500, detail=str(e))
521
+
522
+
523
  @app.post(
524
  "/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
525
  )
526
  async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
527
+ logger.info("Received request to crawl directly.")
528
  try:
529
+ logger.debug("Acquiring crawler from the crawler pool.")
530
  crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
531
+ logger.debug("Crawler acquired successfully.")
532
+
533
+ logger.debug("Creating extraction strategy based on the request configuration.")
534
  extraction_strategy = crawler_service._create_extraction_strategy(
535
  request.extraction_config
536
  )
537
+ logger.debug("Extraction strategy created successfully.")
538
 
539
  try:
540
  if isinstance(request.urls, list):
541
+ logger.info("Processing multiple URLs.")
542
  results = await crawler.arun_many(
543
  urls=[str(url) for url in request.urls],
544
  extraction_strategy=extraction_strategy,
 
551
  session_id=request.session_id,
552
  **request.extra,
553
  )
554
+ logger.info("Crawling completed for multiple URLs.")
555
  return {"results": [result.dict() for result in results]}
556
  else:
557
+ logger.info("Processing a single URL.")
558
  result = await crawler.arun(
559
  url=str(request.urls),
560
  extraction_strategy=extraction_strategy,
 
567
  session_id=request.session_id,
568
  **request.extra,
569
  )
570
+ logger.info("Crawling completed for a single URL.")
571
  return {"result": result.dict()}
572
  finally:
573
+ logger.debug("Releasing crawler back to the pool.")
574
  await crawler_service.crawler_pool.release(crawler)
575
+ logger.debug("Crawler released successfully.")
576
  except Exception as e:
577
  logger.error(f"Error in direct crawl: {str(e)}")
578
  raise HTTPException(status_code=500, detail=str(e))