Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
982cb95
1
Parent(s):
31086ae
Auto-sync from demo at Tue Dec 16 12:06:58 UTC 2025
Browse files
graphgen/common/init_llm.py
CHANGED
|
@@ -131,7 +131,7 @@ class LLMFactory:
|
|
| 131 |
ray.get_actor(actor_name)
|
| 132 |
except ValueError:
|
| 133 |
print(f"Creating Ray actor for LLM {model_type} with backend {backend}.")
|
| 134 |
-
num_gpus =
|
| 135 |
actor = (
|
| 136 |
ray.remote(LLMServiceActor)
|
| 137 |
.options(
|
|
|
|
| 131 |
ray.get_actor(actor_name)
|
| 132 |
except ValueError:
|
| 133 |
print(f"Creating Ray actor for LLM {model_type} with backend {backend}.")
|
| 134 |
+
num_gpus = float(config.pop("num_gpus", 0))
|
| 135 |
actor = (
|
| 136 |
ray.remote(LLMServiceActor)
|
| 137 |
.options(
|
graphgen/models/llm/local/vllm_wrapper.py
CHANGED
|
@@ -33,8 +33,8 @@ class VLLMWrapper(BaseLLMWrapper):
|
|
| 33 |
|
| 34 |
engine_args = AsyncEngineArgs(
|
| 35 |
model=model,
|
| 36 |
-
tensor_parallel_size=tensor_parallel_size,
|
| 37 |
-
gpu_memory_utilization=gpu_memory_utilization,
|
| 38 |
trust_remote_code=kwargs.get("trust_remote_code", True),
|
| 39 |
disable_log_stats=False,
|
| 40 |
)
|
|
@@ -82,15 +82,15 @@ class VLLMWrapper(BaseLLMWrapper):
|
|
| 82 |
|
| 83 |
async def generate_topk_per_token(
|
| 84 |
self, text: str, history: Optional[List[str]] = None, **extra: Any
|
| 85 |
-
|
| 86 |
full_prompt = self._build_inputs(text, history)
|
| 87 |
-
|
| 88 |
request_id = f"graphgen_topk_{uuid.uuid4()}"
|
| 89 |
|
| 90 |
sp = self.SamplingParams(
|
| 91 |
temperature=0,
|
| 92 |
max_tokens=1,
|
| 93 |
logprobs=self.topk,
|
|
|
|
| 94 |
)
|
| 95 |
|
| 96 |
result_generator = self.engine.generate(full_prompt, sp, request_id=request_id)
|
|
@@ -108,14 +108,22 @@ class VLLMWrapper(BaseLLMWrapper):
|
|
| 108 |
|
| 109 |
top_logprobs = final_output.outputs[0].logprobs[0]
|
| 110 |
|
| 111 |
-
|
| 112 |
for _, logprob_obj in top_logprobs.items():
|
| 113 |
-
tok_str = logprob_obj.decoded_token
|
| 114 |
prob = float(math.exp(logprob_obj.logprob))
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
async def generate_inputs_prob(
|
| 121 |
self, text: str, history: Optional[List[str]] = None, **extra: Any
|
|
|
|
| 33 |
|
| 34 |
engine_args = AsyncEngineArgs(
|
| 35 |
model=model,
|
| 36 |
+
tensor_parallel_size=int(tensor_parallel_size),
|
| 37 |
+
gpu_memory_utilization=float(gpu_memory_utilization),
|
| 38 |
trust_remote_code=kwargs.get("trust_remote_code", True),
|
| 39 |
disable_log_stats=False,
|
| 40 |
)
|
|
|
|
| 82 |
|
| 83 |
async def generate_topk_per_token(
|
| 84 |
self, text: str, history: Optional[List[str]] = None, **extra: Any
|
| 85 |
+
) -> List[Token]:
|
| 86 |
full_prompt = self._build_inputs(text, history)
|
|
|
|
| 87 |
request_id = f"graphgen_topk_{uuid.uuid4()}"
|
| 88 |
|
| 89 |
sp = self.SamplingParams(
|
| 90 |
temperature=0,
|
| 91 |
max_tokens=1,
|
| 92 |
logprobs=self.topk,
|
| 93 |
+
prompt_logprobs=1,
|
| 94 |
)
|
| 95 |
|
| 96 |
result_generator = self.engine.generate(full_prompt, sp, request_id=request_id)
|
|
|
|
| 108 |
|
| 109 |
top_logprobs = final_output.outputs[0].logprobs[0]
|
| 110 |
|
| 111 |
+
candidate_tokens = []
|
| 112 |
for _, logprob_obj in top_logprobs.items():
|
| 113 |
+
tok_str = logprob_obj.decoded_token.strip() if logprob_obj.decoded_token else ""
|
| 114 |
prob = float(math.exp(logprob_obj.logprob))
|
| 115 |
+
candidate_tokens.append(Token(tok_str, prob))
|
| 116 |
+
|
| 117 |
+
candidate_tokens.sort(key=lambda x: -x.prob)
|
| 118 |
+
|
| 119 |
+
if candidate_tokens:
|
| 120 |
+
main_token = Token(
|
| 121 |
+
text=candidate_tokens[0].text,
|
| 122 |
+
prob=candidate_tokens[0].prob,
|
| 123 |
+
top_candidates=candidate_tokens
|
| 124 |
+
)
|
| 125 |
+
return [main_token]
|
| 126 |
+
return []
|
| 127 |
|
| 128 |
async def generate_inputs_prob(
|
| 129 |
self, text: str, history: Optional[List[str]] = None, **extra: Any
|