github-actions[bot] commited on
Commit
982cb95
·
1 Parent(s): 31086ae

Auto-sync from demo at Tue Dec 16 12:06:58 UTC 2025

Browse files
graphgen/common/init_llm.py CHANGED
@@ -131,7 +131,7 @@ class LLMFactory:
131
  ray.get_actor(actor_name)
132
  except ValueError:
133
  print(f"Creating Ray actor for LLM {model_type} with backend {backend}.")
134
- num_gpus = int(config.pop("num_gpus", 0))
135
  actor = (
136
  ray.remote(LLMServiceActor)
137
  .options(
 
131
  ray.get_actor(actor_name)
132
  except ValueError:
133
  print(f"Creating Ray actor for LLM {model_type} with backend {backend}.")
134
+ num_gpus = float(config.pop("num_gpus", 0))
135
  actor = (
136
  ray.remote(LLMServiceActor)
137
  .options(
graphgen/models/llm/local/vllm_wrapper.py CHANGED
@@ -33,8 +33,8 @@ class VLLMWrapper(BaseLLMWrapper):
33
 
34
  engine_args = AsyncEngineArgs(
35
  model=model,
36
- tensor_parallel_size=tensor_parallel_size,
37
- gpu_memory_utilization=gpu_memory_utilization,
38
  trust_remote_code=kwargs.get("trust_remote_code", True),
39
  disable_log_stats=False,
40
  )
@@ -82,15 +82,15 @@ class VLLMWrapper(BaseLLMWrapper):
82
 
83
  async def generate_topk_per_token(
84
  self, text: str, history: Optional[List[str]] = None, **extra: Any
85
- ) -> List[Token]:
86
  full_prompt = self._build_inputs(text, history)
87
-
88
  request_id = f"graphgen_topk_{uuid.uuid4()}"
89
 
90
  sp = self.SamplingParams(
91
  temperature=0,
92
  max_tokens=1,
93
  logprobs=self.topk,
 
94
  )
95
 
96
  result_generator = self.engine.generate(full_prompt, sp, request_id=request_id)
@@ -108,14 +108,22 @@ class VLLMWrapper(BaseLLMWrapper):
108
 
109
  top_logprobs = final_output.outputs[0].logprobs[0]
110
 
111
- tokens = []
112
  for _, logprob_obj in top_logprobs.items():
113
- tok_str = logprob_obj.decoded_token
114
  prob = float(math.exp(logprob_obj.logprob))
115
- tokens.append(Token(tok_str, prob))
116
-
117
- tokens.sort(key=lambda x: -x.prob)
118
- return tokens
 
 
 
 
 
 
 
 
119
 
120
  async def generate_inputs_prob(
121
  self, text: str, history: Optional[List[str]] = None, **extra: Any
 
33
 
34
  engine_args = AsyncEngineArgs(
35
  model=model,
36
+ tensor_parallel_size=int(tensor_parallel_size),
37
+ gpu_memory_utilization=float(gpu_memory_utilization),
38
  trust_remote_code=kwargs.get("trust_remote_code", True),
39
  disable_log_stats=False,
40
  )
 
82
 
83
  async def generate_topk_per_token(
84
  self, text: str, history: Optional[List[str]] = None, **extra: Any
85
+ ) -> List[Token]:
86
  full_prompt = self._build_inputs(text, history)
 
87
  request_id = f"graphgen_topk_{uuid.uuid4()}"
88
 
89
  sp = self.SamplingParams(
90
  temperature=0,
91
  max_tokens=1,
92
  logprobs=self.topk,
93
+ prompt_logprobs=1,
94
  )
95
 
96
  result_generator = self.engine.generate(full_prompt, sp, request_id=request_id)
 
108
 
109
  top_logprobs = final_output.outputs[0].logprobs[0]
110
 
111
+ candidate_tokens = []
112
  for _, logprob_obj in top_logprobs.items():
113
+ tok_str = logprob_obj.decoded_token.strip() if logprob_obj.decoded_token else ""
114
  prob = float(math.exp(logprob_obj.logprob))
115
+ candidate_tokens.append(Token(tok_str, prob))
116
+
117
+ candidate_tokens.sort(key=lambda x: -x.prob)
118
+
119
+ if candidate_tokens:
120
+ main_token = Token(
121
+ text=candidate_tokens[0].text,
122
+ prob=candidate_tokens[0].prob,
123
+ top_candidates=candidate_tokens
124
+ )
125
+ return [main_token]
126
+ return []
127
 
128
  async def generate_inputs_prob(
129
  self, text: str, history: Optional[List[str]] = None, **extra: Any