Spaces:

zerogpu-aoti
/

FLUX.1-dev-fa3-aoti

Running on Zero

cbensimon HF Staff commited on Jun 27

Commit

4d0c9f3

1 Parent(s): 04ce204

Brainstorming backup

Files changed (1) hide show

app.py CHANGED Viewed

@@ -61,6 +61,21 @@ compiled_model: AOTICompiledModel | None = None
 @spaces.GPU
 def run_model():
     global compiled_model
     if compiled_model is None:
         compiled_model = torch._inductor.aoti_load_package(package_path)

 @spaces.GPU
 def run_model():
+    # TODO: compiled model loading should actually go in worker init ...args: (path, weights)
+    # Something like: @spaces.GPU(aoti_load=(package_path, weights))
+    # It will probably solve the Driver runtime error when idle-reusing
+    # And avoids manually handling state with global
+    # Ou autrement :
+    #    pipeline.transformer = ZeroGPUCompiledModel(pt2_path, weights)
+    # Puis les instances de ZeroGPUCompiledModel sont chargées automatiquement pendant le worker init
+    # C'est encore mieux ça (je crois que c'était l'idée que j'avais de base)
+    # Une inferface encore plus high-level ce serait :
+    #    pipeline.transformer = ZeroGPUCompile(pipeline.transformer, kwargs=example_kwargs)
+    # Et la compilation avec @spaces.GPU, le packaging, les wiehgts séparées, etc.
+    # Tout ça serait géré automatiquement
+    # Bon mais faut laisser plusieurs niveaux d'abstraction je pense
+    # Et peut-être commencer par le low-level (voire pas d'helper du tout et tout en mode manuel mais pour le moment j'ai une driver context runtime error)
+    # Je vais quand-même pouvoir trouver un niveau d'abstraction idéal
     global compiled_model
     if compiled_model is None:
         compiled_model = torch._inductor.aoti_load_package(package_path)