from api.models import GENERATE_ENGINE
from api.utils.request import llama_outer_lock, llama_inner_lock


def get_llama_cpp_engine():
    # NOTE: This double lock allows the currently streaming model to
    # check if any other requests are pending in the same thread and cancel
    # the stream if so.
    llama_outer_lock.acquire()
    release_outer_lock = True
    try:
        llama_inner_lock.acquire()
        try:
            llama_outer_lock.release()
            release_outer_lock = False
            yield GENERATE_ENGINE
        finally:
            llama_inner_lock.release()
    finally:
        if release_outer_lock:
            llama_outer_lock.release()