amaye15
commited on
Commit
·
a976cb6
1
Parent(s):
8a2df9f
Debug - Recurision Error
Browse files
main.py
CHANGED
@@ -474,18 +474,71 @@ async def crawl_sync(request: CrawlRequest) -> Dict[str, Any]:
|
|
474 |
raise HTTPException(status_code=408, detail="Task timed out")
|
475 |
|
476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
@app.post(
|
478 |
"/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
|
479 |
)
|
480 |
async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
|
|
481 |
try:
|
|
|
482 |
crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
|
|
|
|
|
|
|
483 |
extraction_strategy = crawler_service._create_extraction_strategy(
|
484 |
request.extraction_config
|
485 |
)
|
|
|
486 |
|
487 |
try:
|
488 |
if isinstance(request.urls, list):
|
|
|
489 |
results = await crawler.arun_many(
|
490 |
urls=[str(url) for url in request.urls],
|
491 |
extraction_strategy=extraction_strategy,
|
@@ -498,8 +551,10 @@ async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
|
498 |
session_id=request.session_id,
|
499 |
**request.extra,
|
500 |
)
|
|
|
501 |
return {"results": [result.dict() for result in results]}
|
502 |
else:
|
|
|
503 |
result = await crawler.arun(
|
504 |
url=str(request.urls),
|
505 |
extraction_strategy=extraction_strategy,
|
@@ -512,9 +567,12 @@ async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
|
512 |
session_id=request.session_id,
|
513 |
**request.extra,
|
514 |
)
|
|
|
515 |
return {"result": result.dict()}
|
516 |
finally:
|
|
|
517 |
await crawler_service.crawler_pool.release(crawler)
|
|
|
518 |
except Exception as e:
|
519 |
logger.error(f"Error in direct crawl: {str(e)}")
|
520 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
474 |
raise HTTPException(status_code=408, detail="Task timed out")
|
475 |
|
476 |
|
477 |
+
# @app.post(
|
478 |
+
# "/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
|
479 |
+
# )
|
480 |
+
# async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
481 |
+
# try:
|
482 |
+
# crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
|
483 |
+
# extraction_strategy = crawler_service._create_extraction_strategy(
|
484 |
+
# request.extraction_config
|
485 |
+
# )
|
486 |
+
|
487 |
+
# try:
|
488 |
+
# if isinstance(request.urls, list):
|
489 |
+
# results = await crawler.arun_many(
|
490 |
+
# urls=[str(url) for url in request.urls],
|
491 |
+
# extraction_strategy=extraction_strategy,
|
492 |
+
# js_code=request.js_code,
|
493 |
+
# wait_for=request.wait_for,
|
494 |
+
# css_selector=request.css_selector,
|
495 |
+
# screenshot=request.screenshot,
|
496 |
+
# magic=request.magic,
|
497 |
+
# cache_mode=request.cache_mode,
|
498 |
+
# session_id=request.session_id,
|
499 |
+
# **request.extra,
|
500 |
+
# )
|
501 |
+
# return {"results": [result.dict() for result in results]}
|
502 |
+
# else:
|
503 |
+
# result = await crawler.arun(
|
504 |
+
# url=str(request.urls),
|
505 |
+
# extraction_strategy=extraction_strategy,
|
506 |
+
# js_code=request.js_code,
|
507 |
+
# wait_for=request.wait_for,
|
508 |
+
# css_selector=request.css_selector,
|
509 |
+
# screenshot=request.screenshot,
|
510 |
+
# magic=request.magic,
|
511 |
+
# cache_mode=request.cache_mode,
|
512 |
+
# session_id=request.session_id,
|
513 |
+
# **request.extra,
|
514 |
+
# )
|
515 |
+
# return {"result": result.dict()}
|
516 |
+
# finally:
|
517 |
+
# await crawler_service.crawler_pool.release(crawler)
|
518 |
+
# except Exception as e:
|
519 |
+
# logger.error(f"Error in direct crawl: {str(e)}")
|
520 |
+
# raise HTTPException(status_code=500, detail=str(e))
|
521 |
+
|
522 |
+
|
523 |
@app.post(
|
524 |
"/crawl_direct", dependencies=[secure_endpoint()] if CRAWL4AI_API_TOKEN else []
|
525 |
)
|
526 |
async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]:
|
527 |
+
logger.info("Received request to crawl directly.")
|
528 |
try:
|
529 |
+
logger.debug("Acquiring crawler from the crawler pool.")
|
530 |
crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params)
|
531 |
+
logger.debug("Crawler acquired successfully.")
|
532 |
+
|
533 |
+
logger.debug("Creating extraction strategy based on the request configuration.")
|
534 |
extraction_strategy = crawler_service._create_extraction_strategy(
|
535 |
request.extraction_config
|
536 |
)
|
537 |
+
logger.debug("Extraction strategy created successfully.")
|
538 |
|
539 |
try:
|
540 |
if isinstance(request.urls, list):
|
541 |
+
logger.info("Processing multiple URLs.")
|
542 |
results = await crawler.arun_many(
|
543 |
urls=[str(url) for url in request.urls],
|
544 |
extraction_strategy=extraction_strategy,
|
|
|
551 |
session_id=request.session_id,
|
552 |
**request.extra,
|
553 |
)
|
554 |
+
logger.info("Crawling completed for multiple URLs.")
|
555 |
return {"results": [result.dict() for result in results]}
|
556 |
else:
|
557 |
+
logger.info("Processing a single URL.")
|
558 |
result = await crawler.arun(
|
559 |
url=str(request.urls),
|
560 |
extraction_strategy=extraction_strategy,
|
|
|
567 |
session_id=request.session_id,
|
568 |
**request.extra,
|
569 |
)
|
570 |
+
logger.info("Crawling completed for a single URL.")
|
571 |
return {"result": result.dict()}
|
572 |
finally:
|
573 |
+
logger.debug("Releasing crawler back to the pool.")
|
574 |
await crawler_service.crawler_pool.release(crawler)
|
575 |
+
logger.debug("Crawler released successfully.")
|
576 |
except Exception as e:
|
577 |
logger.error(f"Error in direct crawl: {str(e)}")
|
578 |
raise HTTPException(status_code=500, detail=str(e))
|