Spaces:

jonahkall
/

ether0-inference

Running on Zero

App Files Files Community

jonahkall commited on Aug 28

Commit

4c346eb

verified ·

1 Parent(s): d02cab9

Upload 51 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
CITATION.cff +66 -0
LICENSE +201 -0
README.md +207 -16
app.py +78 -0
docs/adding_tokens.ipynb +129 -0
docs/assets/ether0_logo.svg +13 -0
docs/assets/training_info.png +3 -0
docs/assets/valid_molecule.svg +211 -0
docs/updated_mistral_chat_template.jinja +22 -0
gradio_test.py +12 -0
packages/remotes/.DS_Store +0 -0
packages/remotes/.gitignore +2 -0
packages/remotes/LICENSE +201 -0
packages/remotes/README.md +38 -0
packages/remotes/pyproject.toml +59 -0
packages/remotes/src/ether0.remotes.egg-info/PKG-INFO +274 -0
packages/remotes/src/ether0.remotes.egg-info/SOURCES.txt +14 -0
packages/remotes/src/ether0.remotes.egg-info/dependency_links.txt +1 -0
packages/remotes/src/ether0.remotes.egg-info/entry_points.txt +2 -0
packages/remotes/src/ether0.remotes.egg-info/requires.txt +16 -0
packages/remotes/src/ether0.remotes.egg-info/top_level.txt +1 -0
packages/remotes/src/ether0/server.py +316 -0
packages/remotes/tests/conftest.py +29 -0
packages/remotes/tests/test_clients.py +91 -0
packages/remotes/tests/test_rewards.py +108 -0
pyproject.toml +546 -0
src/.DS_Store +0 -0
src/ether0.egg-info/PKG-INFO +467 -0
src/ether0.egg-info/SOURCES.txt +46 -0
src/ether0.egg-info/dependency_links.txt +1 -0
src/ether0.egg-info/requires.txt +40 -0
src/ether0.egg-info/top_level.txt +1 -0
src/ether0/chat.py +177 -0
src/ether0/clients.py +163 -0
src/ether0/data.py +225 -0
src/ether0/fingerprints.bloom +3 -0
src/ether0/model_prompts.py +142 -0
src/ether0/models.py +173 -0
src/ether0/problem_prompts.py +196 -0
src/ether0/py.typed +0 -0
src/ether0/rewards.py +753 -0
src/ether0/rings.bloom +3 -0
src/ether0/utils.py +108 -0
tests/conftest.py +12 -0
tests/test_chat.py +61 -0
tests/test_data.py +158 -0
tests/test_model_prompts.py +424 -0
tests/test_models.py +81 -0
tests/test_rewards.py +489 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+docs/assets/training_info.png filter=lfs diff=lfs merge=lfs -text
+src/ether0/fingerprints.bloom filter=lfs diff=lfs merge=lfs -text
+src/ether0/rings.bloom filter=lfs diff=lfs merge=lfs -text

CITATION.cff ADDED Viewed

	@@ -0,0 +1,66 @@

+---
+cff-version: 1.2.0
+title: "Training a Scientific Reasoning Model for Chemistry"
+message: >-
+  If you use this software, please cite it using the
+  metadata from this file.
+authors:
+  - given-names: Siddharth M.
+    family-names: Narayanan
+  - given-names: James D.
+    family-names: Braza
+  - given-names: Ryan-Rhys
+    family-names: Griffiths
+  - given-names: Albert
+    family-names: Bou
+  - given-names: Geemi P.
+    family-names: Wellawatte
+  - given-names: Mayk
+    family-names: Caldas Ramos
+  - given-names: Ludovico
+    family-names: Mitchener
+  - given-names: Samuel G.
+    family-names: Rodriques
+  - given-names: Andrew D.
+    family-names: White
+identifiers:
+  - type: doi
+    value: 10.48550/arXiv.2506.17238
+    description: ArXiv DOI
+  - type: url
+    value: https://arxiv.org/abs/2506.17238
+    description: ArXiv abstract
+repository-code: https://github.com/Future-House/ether0
+keywords:
+  - Artificial Intelligence
+  - Chemistry
+  - Computation and Language
+  - Machine Learning
+  - Reasoning Model
+license: Apache-2.0
+preferred-citation:
+  authors:
+    - given-names: Siddharth M.
+      family-names: Narayanan
+    - given-names: James D.
+      family-names: Braza
+    - given-names: Ryan-Rhys
+      family-names: Griffiths
+    - given-names: Albert
+      family-names: Bou
+    - given-names: Geemi P.
+      family-names: Wellawatte
+    - given-names: Mayk
+      family-names: Caldas Ramos
+    - given-names: Ludovico
+      family-names: Mitchener
+    - given-names: Samuel G.
+      family-names: Rodriques
+    - given-names: Andrew D.
+      family-names: White
+  date-published: 2025-06-04
+  doi: 10.48550/arXiv.2506.17238
+  journal: preprint
+  title: "Training a Scientific Reasoning Model for Chemistry"
+  type: article
+  url: https://arxiv.org/abs/2506.17238

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2025 FutureHouse
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,16 +1,207 @@
----
-title: Ether0 Inference
-emoji: 💬
-colorFrom: yellow
-colorTo: purple
-sdk: gradio
-sdk_version: 5.42.0
-app_file: app.py
-pinned: false
-hf_oauth: true
-hf_oauth_scopes:
-- inference-api
-license: mit
----
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

+# ether0 Reward Model
+[![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/Future-House/ether0)
+[![arXiv](https://img.shields.io/badge/arXiv-2506.17238-b31b1b.svg)](https://arxiv.org/abs/2506.17238)
+[![Project Status: Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
+![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)
+[![Tests](https://github.com/Future-House/ether0/actions/workflows/lint-test.yaml/badge.svg)](https://github.com/Future-House/ether0/actions)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![python](https://img.shields.io/badge/python-3.11+-blue?style=flat&logo=python&logoColor=white)](https://www.python.org)
+[![Model on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/model-on-hf-md-dark.svg)](https://huggingface.co/futurehouse/ether0)
+[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/futurehouse/ether0-benchmark)
+![ether0 logo](docs/assets/ether0_logo.svg)
+_ether0: a scientific reasoning model, dataset, and reward functions for chemistry._
+This repo contains the reward model for evaluating ether0 and similar models,
+along with utilities for working with the verifiable rewards in
+[our benchmark](https://huggingface.co/datasets/futurehouse/ether0-benchmark).
+## Overview
+ether0 is a reasoning language model post-trained through a loop of:
+1. Supervised fine-tuning (SFT) on long chain-of-thought reasoning traces,
+   to elicit reasoning from a base model.
+2. Reinforcement learning with verifiable rewards (RLVR)
+   to improve reasoning on focused task groups, at their own pace.
+   These multitask learned models are referred to as 'specialists'.
+3. Rejection sampling to filter specialists' reasoning
+   for correctness and quality.
+4. SFT on the base model again to make a 'generalist' reasoning model.
+5. RLVR to recover any lost performance and push further in an all-task setting.
+![ether0 training info](docs/assets/training_info.png)
+### Repo Structure
+This repo contains several packages:
+- `ether0`: reward functions, `rdkit` data utilities,
+  dataset generation prompts, dataset data models,
+  language model training prompts, and data models.
+- `ether0.remotes`: server code for ether0 reward functions involving
+  exotic packages and/or third party models.
+> [!NOTE]
+> This repo does not contain training code,
+> although you can find open source repositories like [NeMo-RL](https://github.com/NVIDIA/NeMo-RL)
+> or [Hugging Face TRL](https://github.com/huggingface/trl)
+> that can do the SFT and RL phases of training.
+### Open Weights
+Please see our open-source weights on Hugging Face:
+<https://huggingface.co/futurehouse/ether0>
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("futurehouse/ether0")
+tokenizer = AutoTokenizer.from_pretrained("futurehouse/ether0")
+```
+### Open Test Set
+Please see our open-source benchmark (test set) on Hugging Face:
+<https://huggingface.co/datasets/futurehouse/ether0-benchmark>
+```python
+from datasets import load_dataset
+test_ds = load_dataset("futurehouse/ether0-benchmark", split="test")
+```
+## Usage
+### Installation
+The easiest way to get started is a `pip install` from GitHub:
+```bash
+pip install git+https://github.com/Future-House/ether0.git
+```
+Or if you want the full set up, clone the repo and use `uv`:
+```bash
+git clone https://github.com/Future-House/ether0.git
+cd ether0
+uv sync
+```
+### Reward Functions
+Here is a basic example of how to use the reward functions:
+```python
+from ether0.rewards import valid_mol_eval
+# Task: provide a valid completion of this molecule
+partial_smiles = "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14"
+# Here's two model-proposed SMILES completions
+invalid_completion_smiles = "CCC"
+valid_completion_smiles = ")C=6C=CC=CC6"
+# Evaluate the completions
+assert not valid_mol_eval(invalid_completion_smiles, partial_smiles)
+assert valid_mol_eval(valid_completion_smiles, partial_smiles)
+```
+### Visualization
+If it helps, you can visualize the molecules:
+```python
+from ether0.data import draw_molecule
+# See above reward functions demo for where these came from
+partial_smiles = "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14"
+invalid_completion_smiles = "CCC"
+valid_completion_smiles = ")C=6C=CC=CC6"
+valid_mol_text = draw_molecule(partial_smiles + valid_completion_smiles)
+with open("valid_molecule.svg", "w") as f:
+    f.write(valid_mol_text)
+```
+The output of `draw_molecule` can also be easily visualized using `IPython.display`,
+or in your terminal via `chafa valid_molecule.svg`
+([chafa docs](https://hpjansson.org/chafa/)).
+![valid molecule](docs/assets/valid_molecule.svg)
+### Benchmark
+Here is a sample baseline of
+[`ether0-benchmark`](https://huggingface.co/datasets/futurehouse/ether0-benchmark)
+on `gpt-4o` using [`lmi`](https://github.com/Future-House/ldp/tree/main/packages/lmi).
+To install `lmi`, please install `ether0` with the `baselines` extra
+(for example `uv sync --extra baselines`).
+We also need to run our remote rewards server via `ether0-serve`
+(for more information, see [`ether0.remotes` docs](packages/remotes/README.md)):
+```bash
+ETHER0_REMOTES_API_TOKEN=abc123 ether0-serve
+```
+Next, start `ipython` with the relevant environment variables set:
+```bash
+ETHER0_REMOTES_API_BASE_URL="http://127.0.0.1:8000" ETHER0_REMOTES_API_TOKEN=abc123 \
+    ipython
+```
+And run the following Python code:
+```python
+import itertools
+import statistics
+from collections import defaultdict
+from aviary.core import Message
+from datasets import load_dataset
+from lmi import LiteLLMModel
+from tqdm.asyncio import tqdm_asyncio as asyncio
+from ether0.data import get_problem_category
+from ether0.model_prompts import LOOSE_XML_ANSWER_USER_PROMPT, extract_answer_loose
+from ether0.models import RewardFunctionInfo
+from ether0.rewards import EVAL_FUNCTIONS
+# Add LLM prompt of your making to the dataset
+test_ds = load_dataset("futurehouse/ether0-benchmark", split="test").map(
+    lambda x: {"prompt": "\n\n".join((LOOSE_XML_ANSWER_USER_PROMPT, x["problem"]))}
+)
+# Prompt to LLM
+model = LiteLLMModel(name="gpt-4o")
+results = await asyncio.gather(
+    *(model.acompletion([Message(content=row["prompt"])]) for row in test_ds),
+    desc="Running evaluation",
+)
+# Compute rewards
+per_category_rewards = defaultdict(list)
+for row, result in zip(test_ds, results, strict=True):
+    # NOTE: you can also use `ether0.rewards.accuracy_reward`,
+    # but we decided to go a bit "lower level" for this demo
+    reward_info = RewardFunctionInfo.model_validate(row["solution"])
+    yhat = extract_answer_loose(result[0].text)
+    reward = EVAL_FUNCTIONS[reward_info.fxn_name](
+        yhat=yhat, y=reward_info.answer_info, test=True
+    )
+    per_category_rewards[get_problem_category(reward_info.problem_type)].append(reward)
+for category, rewards in sorted(per_category_rewards.items()):
+    print(
+        f"In category {category!r} of {len(rewards)} questions,"
+        f" average reward was {statistics.mean(rewards):.3f}."
+    )
+accuracy = statistics.mean(itertools.chain.from_iterable(per_category_rewards.values()))
+print(f"Cumulative average reward across {len(test_ds)} questions was {accuracy:.3f}.")
+```

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import gradio as gr
+import spaces
+import torch
+import logging
+import time
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers.utils import logging as hf_logging
+logging.basicConfig(
+    filename="/tmp/app.log",
+    level=logging.DEBUG,
+    format="%(asctime)s %(levelname)s: %(message)s"
+)
+logging.info("Starting app.py logging")
+hf_logging.set_verbosity_debug()
+hf_logging.set_verbosity_info()
+hf_logging.enable_default_handler()
+hf_logging.enable_explicit_format()
+hf_logging.add_handler(logging.FileHandler("/tmp/transformers.log"))
+model_id = "futurehouse/ether0"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.float16
+)
+@spaces.GPU
+def chat_fn(prompt, max_tokens=512):
+    t0 = time.time()
+    max_tokens = min(int(max_tokens), 32_000)
+    try:
+        messages = [{"role": "user", "content": prompt}]
+        chat_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
+        t1 = time.time()
+        logging.info(f"🧠 Tokenization complete in {t1 - t0:.2f}s")
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_tokens,
+            do_sample=True,
+            temperature=0.1,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        t2 = time.time()
+        logging.info(f"⚡️ Generation complete in {t2 - t1:.2f}s (max_tokens={max_tokens})")
+        generated_text = tokenizer.decode(
+            outputs[0][inputs['input_ids'].shape[1]:],
+            skip_special_tokens=True
+        )
+        t3 = time.time()
+        logging.info(f"🔓 Decoding complete in {t3 - t2:.2f}s (output length: {len(generated_text)})")
+        return generated_text
+    except Exception:
+        logging.exception("❌ Exception during generation")
+        return "⚠️ Generation failed"
+gr.Interface(
+    fn=chat_fn,
+    inputs=[
+        gr.Textbox(label="prompt"),
+        gr.Number(label="max_tokens", value=512, precision=0)
+    ],
+    outputs="text",
+    title="Ether0"
+).launch(ssr_mode=False)

docs/adding_tokens.ipynb ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "072120f9",
+   "metadata": {},
+   "source": [
+    "If you would like to modify a base model to add our custom reasoning tokens,\n",
+    "here's how to do it.\n",
+    "\n",
+    "Firstly, please install the `add-tokens` extra via\n",
+    "`pip install ether0[add-tokens]` for the `transformers` package.\n",
+    "\n",
+    "Then, configure the following inputs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2fb6296",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Model name/revisions for Hugging Face Hub\n",
+    "input_model_name = \"mistralai/Mistral-Small-24B-Instruct-2501\"\n",
+    "input_model_revision: str | None = None\n",
+    "output_model_name = \"FILL ME IN\"\n",
+    "output_model_revision: str | None = None\n",
+    "output_model_is_private = True\n",
+    "tokenizer_only = False  # Set True to only update the tokenizer\n",
+    "push_to_hf = False  # Set True to push to Hugging Face Hub\n",
+    "\n",
+    "# Chat template file that uses the new tokens\n",
+    "chat_template_path = \"updated_mistral_chat_template.jinja\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99927d80",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8e15d3fb5e864e1286cf94fc588e504d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`\n",
+      "The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "\n",
+    "from ether0.model_prompts import ANSWER_END, ANSWER_START, THINK_END, THINK_START\n",
+    "\n",
+    "REASONING_TOKENS_TO_ADD = [\n",
+    "    THINK_START,\n",
+    "    THINK_END,\n",
+    "    ANSWER_START,\n",
+    "    ANSWER_END,\n",
+    "]\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\n",
+    "    input_model_name, revision=input_model_revision\n",
+    ")\n",
+    "# NOTE: reasoning tokens are normal (not special) tokens so they aren't\n",
+    "# removed when passing skip_special_tokens=True to a tokenizer\n",
+    "tokenizer.add_tokens(REASONING_TOKENS_TO_ADD)\n",
+    "tokenizer.chat_template = Path(chat_template_path).read_text(encoding=\"utf-8\")\n",
+    "if push_to_hf:\n",
+    "    tokenizer.push_to_hub(\n",
+    "        output_model_name,\n",
+    "        revision=output_model_revision,\n",
+    "        private=output_model_is_private,\n",
+    "    )\n",
+    "\n",
+    "if not tokenizer_only:\n",
+    "    model = AutoModelForCausalLM.from_pretrained(\n",
+    "        input_model_name, revision=input_model_revision\n",
+    "    )\n",
+    "    # SEE: https://www.thonking.ai/p/what-shapes-do-matrix-multiplications\n",
+    "    model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=64)\n",
+    "    if push_to_hf:\n",
+    "        model.push_to_hub(\n",
+    "            output_model_name,\n",
+    "            revision=output_model_revision,\n",
+    "            private=output_model_is_private,\n",
+    "        )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

docs/assets/ether0_logo.svg ADDED Viewed

docs/assets/training_info.png ADDED Viewed

Git LFS Details

SHA256: 394653640102293eb6a10e4aac2c14fa58aea61077f3203211cf0226b38d84fe
Pointer size: 131 Bytes
Size of remote file: 589 kB

docs/assets/valid_molecule.svg ADDED Viewed

docs/updated_mistral_chat_template.jinja ADDED Viewed

	@@ -0,0 +1,22 @@

+{%- set default_system_message = "You are a scientific reasoning AI assistant." %}
+{{- bos_token }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = default_system_message %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
+{%- for message in loop_messages %}
+    {%- if message['role'] == 'user' %}
+        {{- '[INST]' + message['content'] + '[/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- message['content'] + eos_token }}
+    {%- else %}
+        {{- raise_exception("Only user, system and assistant roles are supported!") }}
+    {%- endif %}
+{%- endfor %}

gradio_test.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import gradio as gr
+def greet(name, intensity):
+    return "Hello, " + name + "!" * int(intensity)
+demo = gr.Interface(
+    fn=greet,
+    inputs=["text", "slider"],
+    outputs=["text"],
+)
+demo.launch()

packages/remotes/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

packages/remotes/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Ignore any downloaded PyTorch model (e.g. a MolTrans model)
2	+ src/ether0/*.pt

packages/remotes/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2025 FutureHouse
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

packages/remotes/README.md ADDED Viewed

	@@ -0,0 +1,38 @@

+# ether0.remotes
+Server code for ether0 reward functions.
+## Molecular Transformer (MolTrans) Model
+To run the `/translate` endpoint, you need a pre-trained MolTrans PyTorch model.
+This can be acquired from Future House's Google Drive via the following command:
+```bash
+curl --location --output src/ether0/USPTO480k_model_step_400000.pt \
+  "https://drive.usercontent.google.com/download?id=1Rjd3wXg2oLeCpNUofFRvVvQoOcgWd6vf&export=download&confirm=t"
+```
+Or more manually:
+1. Go to [this notebook][1]
+2. Download the `USPTO480k_model_step_400000.pt`
+   linked in the `trained_model_url` variable's linked Google Drive file:
+   <https://drive.google.com/uc?id=1ywJCJHunoPTB5wr6KdZ8aLv7tMFMBHNy>
+3. Set the environment variable `ETHER0_REMOTES_MOLTRANS_MODEL_PATH`
+   to the downloaded PyTorch model's location,
+   or place the model in the default checked `ether0` source code folder (`src/ether0`).
+[1]: https://github.com/schwallergroup/ai4chem_course/blob/main/notebooks/07%20-%20Reaction%20Prediction/template_free.ipynb
+## Serving
+To run the server:
+1. `pip install` with the `serve` extra: `pip install ether0.remotes[serve]`
+2. Then run the following command:
+```bash
+ETHER0_REMOTES_API_TOKEN="abc123" \
+ETHER0_REMOTES_MOLTRANS_MODEL_PATH="/path/to/USPTO480k_model_step_400000.pt" \
+ether0-serve
+```

packages/remotes/pyproject.toml ADDED Viewed

	@@ -0,0 +1,59 @@

+[build-system]
+build-backend = "setuptools.build_meta"
+requires = ["setuptools>=64", "setuptools_scm>=8"]
+[dependency-groups]
+dev = ["ether0.remotes[dev]"]
+[project]
+authors = [
+    {email = "[email protected]", name = "FutureHouse technical staff"},
+]
+classifiers = [
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: POSIX",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Chemistry",
+]
+dependencies = [
+    "OpenNMT-py==2.3.0",  # Match MolecularTransformer version
+    "fastapi",
+    "molbloom>=2.3.4",  # For nested zinc20 catalog",
+    "molsol>=0.0.3",
+    "numpy>=1.20",  # Pin for numpy typing
+    "pydantic>=2",  # Pin to keep recent
+    "rdkit",
+    "torch<2.6",  # Downpin for weights_only introduction breaking model_builder's load_test_model
+]
+description = "Server code for remotes of ether0"
+dynamic = ["version"]
+license = {file = "LICENSE"}
+name = "ether0.remotes"
+readme = "README.md"
+requires-python = ">=3.11"
+[project.optional-dependencies]
+dev = [
+    "ether0",
+    "ether0.remotes[serve]",
+    "tensorboard>=2.19",  # Indirect dependency we pin to keep recent
+]
+serve = ["uvicorn"]
+[project.scripts]
+ether0-serve = "ether0.server:main"
+[tool.ruff]
+extend = "../../pyproject.toml"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.setuptools_scm]
+root = "../.."

packages/remotes/src/ether0.remotes.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,274 @@

+Metadata-Version: 2.4
+Name: ether0.remotes
+Version: 0.1.dev19+g97db23388
+Summary: Server code for remotes of ether0
+Author-email: FutureHouse technical staff <[email protected]>
+License:                                  Apache License
+                                   Version 2.0, January 2004
+                                http://www.apache.org/licenses/
+           TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+           1. Definitions.
+              "License" shall mean the terms and conditions for use, reproduction,
+              and distribution as defined by Sections 1 through 9 of this document.
+              "Licensor" shall mean the copyright owner or entity authorized by
+              the copyright owner that is granting the License.
+              "Legal Entity" shall mean the union of the acting entity and all
+              other entities that control, are controlled by, or are under common
+              control with that entity. For the purposes of this definition,
+              "control" means (i) the power, direct or indirect, to cause the
+              direction or management of such entity, whether by contract or
+              otherwise, or (ii) ownership of fifty percent (50%) or more of the
+              outstanding shares, or (iii) beneficial ownership of such entity.
+              "You" (or "Your") shall mean an individual or Legal Entity
+              exercising permissions granted by this License.
+              "Source" form shall mean the preferred form for making modifications,
+              including but not limited to software source code, documentation
+              source, and configuration files.
+              "Object" form shall mean any form resulting from mechanical
+              transformation or translation of a Source form, including but
+              not limited to compiled object code, generated documentation,
+              and conversions to other media types.
+              "Work" shall mean the work of authorship, whether in Source or
+              Object form, made available under the License, as indicated by a
+              copyright notice that is included in or attached to the work
+              (an example is provided in the Appendix below).
+              "Derivative Works" shall mean any work, whether in Source or Object
+              form, that is based on (or derived from) the Work and for which the
+              editorial revisions, annotations, elaborations, or other modifications
+              represent, as a whole, an original work of authorship. For the purposes
+              of this License, Derivative Works shall not include works that remain
+              separable from, or merely link (or bind by name) to the interfaces of,
+              the Work and Derivative Works thereof.
+              "Contribution" shall mean any work of authorship, including
+              the original version of the Work and any modifications or additions
+              to that Work or Derivative Works thereof, that is intentionally
+              submitted to Licensor for inclusion in the Work by the copyright owner
+              or by an individual or Legal Entity authorized to submit on behalf of
+              the copyright owner. For the purposes of this definition, "submitted"
+              means any form of electronic, verbal, or written communication sent
+              to the Licensor or its representatives, including but not limited to
+              communication on electronic mailing lists, source code control systems,
+              and issue tracking systems that are managed by, or on behalf of, the
+              Licensor for the purpose of discussing and improving the Work, but
+              excluding communication that is conspicuously marked or otherwise
+              designated in writing by the copyright owner as "Not a Contribution."
+              "Contributor" shall mean Licensor and any individual or Legal Entity
+              on behalf of whom a Contribution has been received by Licensor and
+              subsequently incorporated within the Work.
+           2. Grant of Copyright License. Subject to the terms and conditions of
+              this License, each Contributor hereby grants to You a perpetual,
+              worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+              copyright license to reproduce, prepare Derivative Works of,
+              publicly display, publicly perform, sublicense, and distribute the
+              Work and such Derivative Works in Source or Object form.
+           3. Grant of Patent License. Subject to the terms and conditions of
+              this License, each Contributor hereby grants to You a perpetual,
+              worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+              (except as stated in this section) patent license to make, have made,
+              use, offer to sell, sell, import, and otherwise transfer the Work,
+              where such license applies only to those patent claims licensable
+              by such Contributor that are necessarily infringed by their
+              Contribution(s) alone or by combination of their Contribution(s)
+              with the Work to which such Contribution(s) was submitted. If You
+              institute patent litigation against any entity (including a
+              cross-claim or counterclaim in a lawsuit) alleging that the Work
+              or a Contribution incorporated within the Work constitutes direct
+              or contributory patent infringement, then any patent licenses
+              granted to You under this License for that Work shall terminate
+              as of the date such litigation is filed.
+           4. Redistribution. You may reproduce and distribute copies of the
+              Work or Derivative Works thereof in any medium, with or without
+              modifications, and in Source or Object form, provided that You
+              meet the following conditions:
+              (a) You must give any other recipients of the Work or
+                  Derivative Works a copy of this License; and
+              (b) You must cause any modified files to carry prominent notices
+                  stating that You changed the files; and
+              (c) You must retain, in the Source form of any Derivative Works
+                  that You distribute, all copyright, patent, trademark, and
+                  attribution notices from the Source form of the Work,
+                  excluding those notices that do not pertain to any part of
+                  the Derivative Works; and
+              (d) If the Work includes a "NOTICE" text file as part of its
+                  distribution, then any Derivative Works that You distribute must
+                  include a readable copy of the attribution notices contained
+                  within such NOTICE file, excluding those notices that do not
+                  pertain to any part of the Derivative Works, in at least one
+                  of the following places: within a NOTICE text file distributed
+                  as part of the Derivative Works; within the Source form or
+                  documentation, if provided along with the Derivative Works; or,
+                  within a display generated by the Derivative Works, if and
+                  wherever such third-party notices normally appear. The contents
+                  of the NOTICE file are for informational purposes only and
+                  do not modify the License. You may add Your own attribution
+                  notices within Derivative Works that You distribute, alongside
+                  or as an addendum to the NOTICE text from the Work, provided
+                  that such additional attribution notices cannot be construed
+                  as modifying the License.
+              You may add Your own copyright statement to Your modifications and
+              may provide additional or different license terms and conditions
+              for use, reproduction, or distribution of Your modifications, or
+              for any such Derivative Works as a whole, provided Your use,
+              reproduction, and distribution of the Work otherwise complies with
+              the conditions stated in this License.
+           5. Submission of Contributions. Unless You explicitly state otherwise,
+              any Contribution intentionally submitted for inclusion in the Work
+              by You to the Licensor shall be under the terms and conditions of
+              this License, without any additional terms or conditions.
+              Notwithstanding the above, nothing herein shall supersede or modify
+              the terms of any separate license agreement you may have executed
+              with Licensor regarding such Contributions.
+           6. Trademarks. This License does not grant permission to use the trade
+              names, trademarks, service marks, or product names of the Licensor,
+              except as required for reasonable and customary use in describing the
+              origin of the Work and reproducing the content of the NOTICE file.
+           7. Disclaimer of Warranty. Unless required by applicable law or
+              agreed to in writing, Licensor provides the Work (and each
+              Contributor provides its Contributions) on an "AS IS" BASIS,
+              WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+              implied, including, without limitation, any warranties or conditions
+              of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+              PARTICULAR PURPOSE. You are solely responsible for determining the
+              appropriateness of using or redistributing the Work and assume any
+              risks associated with Your exercise of permissions under this License.
+           8. Limitation of Liability. In no event and under no legal theory,
+              whether in tort (including negligence), contract, or otherwise,
+              unless required by applicable law (such as deliberate and grossly
+              negligent acts) or agreed to in writing, shall any Contributor be
+              liable to You for damages, including any direct, indirect, special,
+              incidental, or consequential damages of any character arising as a
+              result of this License or out of the use or inability to use the
+              Work (including but not limited to damages for loss of goodwill,
+              work stoppage, computer failure or malfunction, or any and all
+              other commercial damages or losses), even if such Contributor
+              has been advised of the possibility of such damages.
+           9. Accepting Warranty or Additional Liability. While redistributing
+              the Work or Derivative Works thereof, You may choose to offer,
+              and charge a fee for, acceptance of support, warranty, indemnity,
+              or other liability obligations and/or rights consistent with this
+              License. However, in accepting such obligations, You may act only
+              on Your own behalf and on Your sole responsibility, not on behalf
+              of any other Contributor, and only if You agree to indemnify,
+              defend, and hold each Contributor harmless for any liability
+              incurred by, or claims asserted against, such Contributor by reason
+              of your accepting any such warranty or additional liability.
+           END OF TERMS AND CONDITIONS
+           APPENDIX: How to apply the Apache License to your work.
+              To apply the Apache License to your work, attach the following
+              boilerplate notice, with the fields enclosed by brackets "[]"
+              replaced with your own identifying information. (Don't include
+              the brackets!)  The text should be enclosed in the appropriate
+              comment syntax for the file format. We also recommend that a
+              file or class name and description of purpose be included on the
+              same "printed page" as the copyright notice for easier
+              identification within third-party archives.
+           Copyright 2025 FutureHouse
+           Licensed under the Apache License, Version 2.0 (the "License");
+           you may not use this file except in compliance with the License.
+           You may obtain a copy of the License at
+               http://www.apache.org/licenses/LICENSE-2.0
+           Unless required by applicable law or agreed to in writing, software
+           distributed under the License is distributed on an "AS IS" BASIS,
+           WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+           See the License for the specific language governing permissions and
+           limitations under the License.
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: POSIX
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Scientific/Engineering :: Chemistry
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: OpenNMT-py==2.3.0
+Requires-Dist: fastapi
+Requires-Dist: molbloom>=2.3.4
+Requires-Dist: molsol>=0.0.3
+Requires-Dist: numpy>=1.20
+Requires-Dist: pydantic>=2
+Requires-Dist: rdkit
+Requires-Dist: torch<2.6
+Provides-Extra: dev
+Requires-Dist: ether0; extra == "dev"
+Requires-Dist: ether0.remotes[serve]; extra == "dev"
+Requires-Dist: tensorboard>=2.19; extra == "dev"
+Provides-Extra: serve
+Requires-Dist: uvicorn; extra == "serve"
+Dynamic: license-file
+# ether0.remotes
+Server code for ether0 reward functions.
+## Molecular Transformer (MolTrans) Model
+To run the `/translate` endpoint, you need a pre-trained MolTrans PyTorch model.
+This can be acquired from Future House's Google Drive via the following command:
+```bash
+curl --location --output src/ether0/USPTO480k_model_step_400000.pt \
+  "https://drive.usercontent.google.com/download?id=1Rjd3wXg2oLeCpNUofFRvVvQoOcgWd6vf&export=download&confirm=t"
+```
+Or more manually:
+1. Go to [this notebook][1]
+2. Download the `USPTO480k_model_step_400000.pt`
+   linked in the `trained_model_url` variable's linked Google Drive file:
+   <https://drive.google.com/uc?id=1ywJCJHunoPTB5wr6KdZ8aLv7tMFMBHNy>
+3. Set the environment variable `ETHER0_REMOTES_MOLTRANS_MODEL_PATH`
+   to the downloaded PyTorch model's location,
+   or place the model in the default checked `ether0` source code folder (`src/ether0`).
+[1]: https://github.com/schwallergroup/ai4chem_course/blob/main/notebooks/07%20-%20Reaction%20Prediction/template_free.ipynb
+## Serving
+To run the server:
+1. `pip install` with the `serve` extra: `pip install ether0.remotes[serve]`
+2. Then run the following command:
+```bash
+ETHER0_REMOTES_API_TOKEN="abc123" \
+ETHER0_REMOTES_MOLTRANS_MODEL_PATH="/path/to/USPTO480k_model_step_400000.pt" \
+ether0-serve
+```

packages/remotes/src/ether0.remotes.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+.gitignore
+LICENSE
+README.md
+pyproject.toml
+src/ether0/server.py
+src/ether0.remotes.egg-info/PKG-INFO
+src/ether0.remotes.egg-info/SOURCES.txt
+src/ether0.remotes.egg-info/dependency_links.txt
+src/ether0.remotes.egg-info/entry_points.txt
+src/ether0.remotes.egg-info/requires.txt
+src/ether0.remotes.egg-info/top_level.txt
+tests/conftest.py
+tests/test_clients.py
+tests/test_rewards.py

packages/remotes/src/ether0.remotes.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

packages/remotes/src/ether0.remotes.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ ether0-serve = ether0.server:main

packages/remotes/src/ether0.remotes.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+OpenNMT-py==2.3.0
+fastapi
+molbloom>=2.3.4
+molsol>=0.0.3
+numpy>=1.20
+pydantic>=2
+rdkit
+torch<2.6
+[dev]
+ether0
+ether0.remotes[serve]
+tensorboard>=2.19
+[serve]
+uvicorn

packages/remotes/src/ether0.remotes.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ether0

packages/remotes/src/ether0/server.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import argparse
+import os
+import re
+import secrets
+import tempfile
+import uuid
+from collections import defaultdict
+from pathlib import Path
+from typing import ClassVar, Literal
+import numpy as np
+import numpy.typing as npt
+from fastapi import Depends, FastAPI, HTTPException, status
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from molbloom import buy
+from molsol import KDESol
+from onmt import opts
+from onmt.translate.translator import build_translator
+from onmt.utils.logging import init_logger
+from onmt.utils.misc import split_corpus
+from onmt.utils.parse import ArgumentParser
+from pydantic import BaseModel
+from rdkit import Chem
+ETHER0_DIR = Path(__file__).parent
+auth_scheme = HTTPBearer()
+def validate_token(
+    credentials: HTTPAuthorizationCredentials = Depends(auth_scheme),  # noqa: B008
+) -> str:
+    # NOTE: don't use os.environ.get() to avoid possible empty string matches, and
+    # to have clearer server failures if the AUTH_TOKEN env var isn't present
+    if not secrets.compare_digest(
+        credentials.credentials, os.environ["ETHER0_REMOTES_API_TOKEN"]
+    ):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Incorrect bearer token",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    return credentials.credentials
+app = FastAPI(title="ether0 remotes server", dependencies=[Depends(validate_token)])
+class MolecularTransformer:
+    """Uses code from https://doi.org/10.1021/acscentsci.9b00576."""
+    DEFAULT_MOLTRANS_MODEL_PATH: ClassVar[Path] = (
+        ETHER0_DIR / "USPTO480k_model_step_400000.pt"
+    )
+    def __init__(self):
+        # Use `or None` to deny setting empty string to the environment variable
+        os_environ_model_path = (
+            os.environ.get("ETHER0_REMOTES_MOLTRANS_MODEL_PATH") or None
+        )
+        self.model_path = os_environ_model_path or str(self.DEFAULT_MOLTRANS_MODEL_PATH)
+        if not Path(self.model_path).exists():
+            raise FileNotFoundError(
+                f"MolTrans model not found"
+                f"{f', did you misconfigure the path {os_environ_model_path}?' if os_environ_model_path else '.'}"  # noqa: E501
+                " Please properly configure the environment variable"
+                " 'ETHER0_REMOTES_MOLTRANS_MODEL_PATH',"
+                f" or the default path checked is {self.DEFAULT_MOLTRANS_MODEL_PATH}."
+            )
+    @staticmethod
+    def translate(opt: argparse.Namespace) -> None:
+        ArgumentParser.validate_translate_opts(opt)
+        logger = init_logger(opt.log_file)
+        translator = build_translator(opt, logger=logger, report_score=True)
+        src_shards = split_corpus(opt.src, opt.shard_size)
+        tgt_shards = split_corpus(opt.tgt, opt.shard_size)
+        features_shards = []
+        features_names = []
+        for feat_name, feat_path in opt.src_feats.items():
+            features_shards.append(split_corpus(feat_path, opt.shard_size))
+            features_names.append(feat_name)
+        shard_pairs = zip(src_shards, tgt_shards, *features_shards)  # noqa: B905
+        for (src_shard, tgt_shard, *features_shard) in shard_pairs:
+            features_shard_ = defaultdict(list)
+            for j, x in enumerate(features_shard):
+                features_shard_[features_names[j]] = x
+            translator.translate(
+                src=src_shard,
+                src_feats=features_shard_,
+                tgt=tgt_shard,
+                batch_size=opt.batch_size,
+                batch_type=opt.batch_type,
+                attn_debug=opt.attn_debug,
+                align_debug=opt.align_debug,
+            )
+    @staticmethod
+    def smiles_tokenizer(smiles: str) -> str:
+        smiles_regex = re.compile(
+            r"(\%\([0-9]{3}\)|\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\||\(|\)|\.|=|#|-|\+|\\|\/|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])"
+        )
+        tokens = list(smiles_regex.findall(smiles))
+        return " ".join(tokens)
+    @staticmethod
+    def canonicalize_smiles(smiles: str) -> str:
+        # Try to use canonical smiles because original uspto is distributed in canonical form.
+        # If fails, we trust the augmentation and use the original smiles.
+        try:
+            return Chem.MolToSmiles(
+                Chem.MolFromSmiles(smiles), isomericSmiles=True, canonical=True
+            )
+        except Exception as err:
+            # If rdkit failed, it means some molecule is invalid.
+            # Here we catch which ones are invalid so we inform what's wrong
+            # on the error message.
+            invalid_smiles = []
+            for mol in smiles.split("."):
+                try:
+                    Chem.MolToSmiles(
+                        Chem.MolFromSmiles(mol), isomericSmiles=True, canonical=True
+                    )
+                except:  # noqa: E722
+                    invalid_smiles.append(mol)
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=(
+                    "The reaction could not be parsed by RDKit. The following"
+                    f" SMILES were invalid: {', '.join(invalid_smiles)}"
+                ),
+            ) from err
+    def run(self, reaction: str) -> tuple[str, uuid.UUID]:
+        """Translates SMILES reaction strings using MolTrans model.
+        Args:
+            reaction: SMILES representation of a chemical reaction
+        Returns:
+            SMILES representation of the predicted product and a job ID
+        """
+        # Create a unique ID for the request
+        job_id = uuid.uuid4()
+        # Create temporary files for use in mol moltransformer
+        with (
+            tempfile.NamedTemporaryFile(
+                mode="w+", delete=False, encoding="utf-8"
+            ) as precursor_file,
+            tempfile.NamedTemporaryFile(
+                mode="w+", delete=False, encoding="utf-8"
+            ) as output_file,
+        ):
+            # Write tokenized reaction to the precursor file
+            precursor_file.write(MolecularTransformer.smiles_tokenizer(reaction))
+            precursor_file.flush()
+            # OpenNMT expects to receive a list of arguments to translate
+            parser = ArgumentParser()
+            opts.config_opts(parser)
+            opts.translate_opts(parser)
+            args_dict = {
+                "model": self.model_path,
+                "src": precursor_file.name,
+                "output": output_file.name,
+                "batch_size": "64",
+                "beam_size": "50",
+                "max_length": "300",
+            }
+            args_list = [f"--{k}={v}" for k, v in args_dict.items()]
+            opt = parser.parse_args(args_list)
+            MolecularTransformer.translate(opt)
+            output_file.close()
+            prediction = Path(output_file.name).read_text(encoding="utf-8")
+            # Clean up temporary files
+            # we don't care if a failure leaves them dangling,
+            # since they are in a temp dir
+            os.unlink(precursor_file.name)
+            os.unlink(output_file.name)
+        return prediction.replace(" ", "").strip(), job_id
+class MolBloom:
+    """Uses code from https://doi.org/10.1186/s13321-023-00765-1."""
+    def __init__(self) -> None:
+        # trigger eager loading of the bloom filter
+        buy("C1=CC=CC=C1", catalog="zinc20")
+        self.bloom = buy
+    def run(self, smiles: str) -> bool:
+        """Checks if a molecule is purchasable using MolBloom.
+        Args:
+            smiles: SMILES representation of a molecule
+        Returns:
+            True if the molecule is purchasable, False otherwise
+        """
+        return self.bloom(smiles, canonicalize=True, catalog="zinc20")
+class Solubility:
+    """Uses code from https://doi.org/10.1039/D3DD00217A."""
+    def __init__(self) -> None:
+        self.sol = KDESol()
+    def run(self, smiles: str) -> npt.NDArray[np.float32] | Literal[False]:
+        """Computes solubility prediction for a molecule using KDESol.
+        Args:
+            smiles: SMILES representation of a molecule.
+        Returns:
+            Numpy array containing the mean predicted solubility,
+                aleatoric uncertainty (au), and epistemic uncertainty (eu).
+        """
+        m = Chem.MolFromSmiles(smiles)
+        if m is None:
+            return False  # type: ignore[unreachable]
+        prediction = self.sol(Chem.MolToSmiles(m, canonical=True, isomericSmiles=False))
+        if prediction is None:
+            # Try without canonicalization.
+            # The model is an LSTM that uses tokens generated from SELFIES tokens.
+            # Depending on the SMILES notation, the model might not have the necessary tokens
+            # in its vocabulary to describe the molecule.
+            prediction = self.sol(smiles)
+        return prediction if prediction is not None else False
+class MolTransRequest(BaseModel):
+    reaction: str
+@app.post("/translate")
+def translate_endpoint(request: MolTransRequest) -> dict[str, str | uuid.UUID]:
+    reaction = request.reaction.replace(" ", "")
+    if not reaction.count(">") == 2:  # noqa: PLR2004
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=(
+                f"Syntax error in the reaction SMILES: {reaction}\n"
+                "The reaction should have two '>' characters, and no spaces."
+            ),
+        )
+    rxn = reaction.split(">")[:-1]
+    query_reaction = MolecularTransformer.canonicalize_smiles(
+        ".".join([r for r in rxn if r])
+    )
+    product, job_id = MolecularTransformer().run(query_reaction)
+    return {
+        "product": product,
+        "id": job_id,
+        "reaction": query_reaction + ">>" + product,
+    }
+class MolBloomRequest(BaseModel):
+    smiles: list[str] | str
+@app.post("/is_purchasable")
+def is_purchasable_endpoint(request: MolBloomRequest) -> dict[str, bool]:
+    is_purchasable = MolBloom().run
+    smiles = request.smiles
+    if isinstance(smiles, str):
+        smiles = [smiles]
+    return {s: is_purchasable(s) for s in smiles}
+class SmilesRequest(BaseModel):
+    smiles: str
+@app.post("/compute_solubility")
+def compute_solubility_endpoint(
+    request: SmilesRequest,
+) -> dict[str, float] | dict[str, str]:
+    if "." in request.smiles:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Only single molecules are supported",
+        )
+    prediction = Solubility().run(smiles=request.smiles)
+    if prediction is False:
+        return {"error": "Solubility prediction failed."}
+    mean, au, eu = prediction.tolist()
+    return {"mean": mean, "au": au, "eu": eu}
+def main() -> None:
+    """Run uvicorn to serve the FastAPI app."""
+    try:
+        import uvicorn  # noqa: PLC0415
+    except ImportError as exc:
+        raise ImportError(
+            "Serving requires the 'serve' extra for the `uvicorn` package. Please:"
+            " `pip install ether0.remotes[serve]`."
+        ) from exc
+    uvicorn.run("ether0.server:app")
+if __name__ == "__main__":
+    main()

packages/remotes/tests/conftest.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import importlib
+import os
+from collections.abc import Iterator
+from unittest.mock import patch
+import pytest
+from fastapi.testclient import TestClient
+@pytest.fixture(name="test_client", scope="session")
+def fixture_test_client() -> Iterator[TestClient]:
+    # Lazily import from aviary so typeguard doesn't throw:
+    # > /path/to/.venv/lib/python3.11/site-packages/typeguard/_pytest_plugin.py:93:
+    # > InstrumentationWarning: typeguard cannot check these packages
+    # > because they are already imported: ether0
+    import ether0.clients  # noqa: PLC0415
+    from ether0.server import app  # noqa: PLC0415
+    client = TestClient(app)
+    with patch.dict(
+        os.environ,
+        {
+            "ETHER0_REMOTES_API_BASE_URL": str(client.base_url),
+            "ETHER0_REMOTES_API_TOKEN": "test_stub",
+        },
+    ):
+        importlib.reload(ether0.clients)  # Pull in updated environment variables
+        yield client

packages/remotes/tests/test_clients.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+from typing import TYPE_CHECKING
+from unittest.mock import patch
+import httpx
+import pytest
+from ether0.clients import fetch_forward_rxn, fetch_purchasable, fetch_solubility
+if TYPE_CHECKING:
+    from fastapi.testclient import TestClient
+IN_GITHUB_ACTIONS: bool = os.getenv("GITHUB_ACTIONS") == "true"
+@pytest.mark.parametrize(
+    ("smiles", "purchasable"),
+    [
+        ("CC(=O)OC1=CC=CC=C1C(=O)O", True),
+        ("CCO", True),
+        ("C1=CC=C(C=C1)C(=O)O", True),
+        ("OCN1C=CC=C1C(=O)O", False),
+    ],
+)
+def test_fetch_purchasable(
+    test_client: "TestClient", smiles: str, purchasable: bool
+) -> None:
+    with patch.object(httpx, "post", test_client.post):
+        assert fetch_purchasable(smiles)[smiles] == purchasable
+@pytest.mark.parametrize(
+    ("smiles", "solubility"),
+    [
+        ("CC(=O)OC1=CC=CC=C1C(=O)O", -2.5),
+        ("O=C(NC1CCCC1)C(C1CC1)S1C(=N)C(C2=CC=NC3=CC=CC=C23)N=C1", -5.9),
+    ],
+)
+def test_fetch_solubility(
+    test_client: "TestClient", smiles: str, solubility: float
+) -> None:
+    with patch.object(httpx, "post", test_client.post):
+        result = fetch_solubility(smiles)
+    assert "solubility" in result
+    assert pytest.approx(result["solubility"], abs=0.1) == solubility
+@pytest.mark.skipif(
+    IN_GITHUB_ACTIONS, reason="Requires loading MolTrans model, too large for CI"
+)
+@pytest.mark.parametrize(
+    ("precursor", "product", "correct"),
+    [
+        pytest.param("CC=O.O=C1CCC1Cl>[Mg].CCOCC>", "CC(O)C1(O)CCC1Cl", True),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>",
+            "CCCN(C)C1CC(C)(C)NC1=O",
+            True,
+        ),
+        pytest.param("CCCC=O.O=C1CC=C(Br)S1>[Mg].CCOCC>", "CCCC(O)C1=CCC(=O)S1", True),
+        pytest.param("CCCC=O.COC(=O)C1CC1Br>[Mg].CCOCC>", "CCCC(O)C1CC1C(=O)OC", True),
+        pytest.param(
+            "CCCC=O.NC1CCCNC1=O>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>",
+            "O=C1NCCCC1N1CNCCCC1=O",
+            True,
+        ),
+        pytest.param("CC=O.O=C1CCC1Cl.[Mg].CCOCC", None, False, id="missing_arrow"),
+        pytest.param(
+            "CC=O.O=C1CCC1Cl > [Mg].CCOCC", None, False, id="space_in_reaction"
+        ),
+        pytest.param("not a > reaction", None, False, id="invalid_reaction"),
+        pytest.param(
+            "CCCC=O.COC(=O)C1CC1Br>[Mg].CCOCC", None, False, id="trailing_arrow"
+        ),
+    ],
+)
+def test_fetch_forward_rxn(
+    test_client: "TestClient", precursor: str, product: str | None, correct: bool
+) -> None:
+    with patch.object(httpx, "post", test_client.post):
+        result = fetch_forward_rxn(precursor)
+    if correct:
+        assert (
+            result.get("product") == product
+        ), f"Failed to get expected {product=} in {result=}."
+        assert "error" not in result
+    else:
+        assert result.get(
+            "error"
+        ), f"Expected an error given {precursor=} and {correct=}"
+        assert "syntax error" in result["error"].lower()
+        assert "product" not in result

packages/remotes/tests/test_rewards.py ADDED Viewed

	@@ -0,0 +1,108 @@

+from typing import TYPE_CHECKING
+from unittest.mock import patch
+import httpx
+import pytest
+from ether0.rewards import oracle_solubility_eval
+from pydantic import JsonValue
+if TYPE_CHECKING:
+    from fastapi.testclient import TestClient
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected"),
+    [
+        pytest.param(
+            "c1c(O)nc2ccc(CN)cc2c1OC1CCCC1",
+            '("scaffold", "c1ccc2c(OC3CCCC3)ccnc2c1", -3.844724178314209, "increase")',
+            1.0,
+            id="match-scaffold",
+        ),
+        pytest.param(
+            "Oc1c(O)nc2ccc(C[NH3])cc2c1OC1CCCC1O",
+            '("scaffold", "c1ccc2c(OC3CCCC3)ccnc2c1", -3.844724178314209, "decrease")',
+            0.0,
+            id="match-scaffold-bad-solubility",
+        ),
+        pytest.param(
+            "CCCCCC=CCCCN(C)CCC",
+            '("groups", ["cis double bond", "hetero N basic H"],  -4.693881511688232, "decrease")',  # noqa: E501
+            1.0,
+            id="match-groups",
+        ),
+        pytest.param(
+            "CCCCCCCCCCN(C)N[NH]CNCC",
+            '("groups", ["cis double bond", "hetero N basic H"],  -1.9085578918457031, "decrease")',  # noqa: E501
+            0.0,
+            id="match-groups-bad-groups",
+        ),
+        pytest.param(
+            "CCCCN(CCCC)C(=O)C1c2ccccc2Oc2ccccc21",
+            '("tanimoto", "CCCN(CCC)C(=O)C1c2ccccc2Oc2ccccc21", -5.273194313049316, "decrease")',
+            1.0,
+            id="match-tanimoto",
+        ),
+        pytest.param(
+            "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCN(CCCC)C(=O)C1c2ccccc2Oc2ccccc21",
+            '("tanimoto", "CCCN(CCC)C(=O)C1c2ccccc2Oc2ccccc21", -5.273194313049316, "decrease")',
+            0.0,
+            id="match-tanimoto-too-far",
+        ),
+        pytest.param(
+            "CCCCCCCCCCCCCCCCCCCCCCN(CCC)C(=O)C1c2ccccc2Oc2ccccc21",
+            '("tanimoto", "CCCN(CCC)C(=O)C1c2ccccc2Oc2ccccc21", -7.45, "decrease")',
+            0.0,
+            id="match-tanimoto-hacked-dist",
+        ),
+        pytest.param(
+            "CN(C)C(=O)C1c2ccccc2Oc2ccccc21",
+            '("tanimoto", "CCCN(CCC)C(=O)C1c2ccccc2Oc2ccccc21", -4.273194313049316, "decrease")',
+            0.0,
+            id="match-tanimoto-bad-solubility",
+        ),
+        pytest.param(
+            "CN1CCN(CCCCNc2ncc3cc(-c4c(Cl)cccc4Cl)c(=O)n(C)c3n2)CC1.CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC",
+            '("tanimoto", "CN1CCN(CCCCNc2ncc3cc(-c4c(Cl)cccc4Cl)c(=O)n(C)c3n2)CC1", -4.273194313049316, "decrease")',  # noqa: E501
+            0.0,
+            id="match-tanimoto-bad-structure",
+        ),
+        pytest.param(
+            "C[C@@H]1CC[C@@]2(CC[C@@]3(C(=CC[C@H]4[C@]3(CC[C@@H]5[C@@]4(C[C@H]([C@@H]([C@@]5(C)CO)O)O)C)C)[C@@H]2[C@H]1C)C)C(=O)O[C@H]6[C@@H]([C@H]([C@@H]([C@H](O6)CO[C@H]7[C@@H]([C@H]([C@@H]([C@H](O7)CO)O[C@H]8[C@@H]([C@@H]([C@H]([C@@H](O8)C)O)O)O)O)O)O)O)O",
+            '("groups", ["secondary alcohol", "primary alcohol", "hydroxylated heteroatom substituted glycosidic ring"],  -5.921097755432129, "increase")',  # noqa: E501
+            1.0,
+            id="problematic-groups",
+        ),
+        pytest.param(
+            "COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1-c1ccc(C#CCCCC(=O)NO)o1",
+            '("tanimoto", "CCCC", -6.25, "increase")',
+            0.0,
+            id="identical-increase",
+        ),
+        pytest.param(
+            "COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1-c1ccc(C#CCCCC(=O)NO)o1",
+            '("tanimoto", "CCCC", -7.25, "decrease")',
+            0.0,
+            id="identical-decrease",
+        ),
+        pytest.param(
+            "OOCCCN(CCC)C(=O)C1c2ccccc2Oc2ccccc21",
+            '("tanimoto", "OCCCN(CCC)C(=O)C1c2ccccc2Oc2ccccc21", -5.273194313049316, "decrease")',  # noqa: E501
+            0.0,
+            id="unreasonable-molecule-failure",
+        ),
+        pytest.param(
+            "CC(C)(C)Cc1nc(Br)c(S(C)(=O)=O)n1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1",
+            "('scaffold', 'c1ccc(-c2nn[nH]n2)c(-c2ccc(Cn3ccnc3)cc2)c1', '-7.790801048278809', 'decrease')",  # noqa: E501
+            0.0,
+            id="eval-has-str-value",
+        ),
+    ],
+)
+def test_oracle_solubility_eval(
+    test_client: "TestClient", yhat: str, y: str, expected: float
+) -> None:
+    expl: dict[str, JsonValue] = {}
+    with patch.object(httpx, "post", test_client.post):
+        result = oracle_solubility_eval(yhat, y, metadata=expl)
+    assert result == expected, f"Expected {expected}, got {result}. Explanation: {expl}"

pyproject.toml ADDED Viewed

	@@ -0,0 +1,546 @@

+[build-system]
+build-backend = "setuptools.build_meta"
+# SEE: https://github.com/pypa/setuptools_scm#pyprojecttoml-usage
+requires = ["setuptools>=64", "setuptools_scm>=8"]
+[dependency-groups]
+dev = ["ether0.remotes[dev]", "ether0[dev]"]
+[project]
+authors = [
+    {email = "[email protected]", name = "FutureHouse technical staff"},
+]
+# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
+classifiers = [
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Chemistry",
+]
+dependencies = [
+    "accelerate>=1.10.1",
+    "datasets",
+    "exmol>=3.3.0", # to get fixed functional group names and py.typed
+    "gradio>=5.44.0",
+    "httpx",
+    "huggingface-hub",
+    "molbloom==2.3.4", # exact pin to be compatible with rings.bloom
+    "pydantic>=2", # Pin to keep recent
+    "rdkit",
+    "regex", # For unicode property \p
+    "spaces>=0.40.1",
+    "tenacity",
+]
+description = "Data models, rewards, and utility functions for ether0."
+dynamic = [
+    "version",  # Required for setuptools_scm
+]
+license = {file = "LICENSE"}
+name = "ether0"
+readme = "README.md"
+requires-python = ">=3.11"
+[project.optional-dependencies]
+add-tokens = [
+    "ipykernel",  # For Jupyter notebook support
+    "ipywidgets>=8",  # For Jupyter notebook support, and pin to keep recent
+    "transformers>=4.49",  # Pin to keep recent
+]
+baselines = [
+    "fhaviary>=0.19",  # Pin for Python 3.13 compatibility
+    "fhlmi>=0.26",  # Pin for Python 3.13 compatibility
+    "ipython",
+]
+dev = [
+    "ether0[add-tokens,typing]",
+    "huggingface-hub[cli]",  # For login inside of CI
+    "ipython>=8",  # Pin to keep recent
+    "mypy>=1.8",  # For addition of mutable-override
+    "pre-commit>=3.4",  # Pin to keep recent
+    "pylint>=3",  # Pin to keep recent
+    "pytest",
+    "pytest-subtests",
+    "pytest-sugar",
+    "pytest-timer[colorama]",
+    "pytest-xdist",
+    "refurb>=2",  # Pin to keep recent
+    "typeguard",
+]
+typing = [
+    "types-regex",
+]
+[project.urls]
+issues = "https://github.com/Future-House/ether0/issues"
+repository = "https://github.com/Future-House/ether0"
+[tool.black]
+enable-unstable-feature = [
+    "hug_parens_with_braces_and_square_brackets",  # TODO: remove after https://github.com/psf/black/issues/4036 resolution
+]
+preview = true
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+ignore-words-list = "amination,astroid,ser"
+[tool.coverage]
+[tool.coverage.report]
+exclude_also = [
+    "@overload",  # SEE: https://github.com/nedbat/coveragepy/issues/970
+    "if TYPE_CHECKING:",
+]
+# Number of digits after the decimal point to display for reported coverage percentages
+precision = 2
+[tool.coverage.run]
+# Measure branch coverage
+branch = true
+# This will be used if you run `coverage run` with no further arguments
+# This is designed to be invoked from within the test directory
+command_line = "-m pytest"
+[tool.markdownlint]
+no-inline-html = false
+[tool.markdownlint.line-length]
+code_block_line_length = 88  # Match ruff line-length
+line_length = 120  # Match ruff max-doc-length
+stern = true
+tables = false
+[tool.mypy]
+# Type-checks the interior of functions without type annotations.
+check_untyped_defs = true
+# Allows enabling one or multiple error codes globally. Note: This option will
+# override disabled error codes from the disable_error_code option.
+enable_error_code = [
+    "ignore-without-code",
+    "mutable-override",
+    "redundant-cast",
+    "redundant-expr",
+    "redundant-self",
+    "truthy-bool",
+    "truthy-iterable",
+    "unimported-reveal",
+    "unreachable",
+    "unused-awaitable",
+    "unused-ignore",
+]
+# Shows a short summary line after error messages.
+error_summary = false
+# A regular expression that matches file names, directory names and paths which mypy
+# should ignore while recursively discovering files to check. Use forward slashes (/) as
+# directory separators on all platforms.
+exclude = [
+    "^\\.?venv",  # SEE: https://regex101.com/r/0rp5Br/1
+]
+# This flag tells mypy that top-level packages will be based in either the current
+# directory, or a member of the MYPYPATH environment variable or mypy_path config
+# option. This option is only useful in the absence of __init__.py. See Mapping file
+# paths to modules for details.
+explicit_package_bases = true
+# Specifies the paths to use, after trying the paths from MYPYPATH environment variable.
+# Useful if you'd like to keep stubs in your repo, along with the config file.
+# Multiple paths are always separated with a : or , regardless of the platform.
+# User home directory and environment variables will be expanded.
+mypy_path = "$MYPY_CONFIG_FILE_DIR/src,$MYPY_CONFIG_FILE_DIR/packages/remotes/src"
+# Comma-separated list of mypy plugins.
+plugins = ["pydantic.mypy"]
+# Use visually nicer output in error messages: use soft word wrap, show source
+# code snippets, and show error location markers.
+pretty = true
+# Shows column numbers in error messages.
+show_column_numbers = true
+# Shows error codes in error messages.
+# SEE: https://mypy.readthedocs.io/en/stable/error_codes.html#error-codes
+show_error_codes = true
+# Prefixes each error with the relevant context.
+show_error_context = true
+# Warns about casting an expression to its inferred type.
+warn_redundant_casts = true
+# Shows a warning when encountering any code inferred to be unreachable or
+# redundant after performing type analysis.
+warn_unreachable = true
+# Warns about per-module sections in the config file that do not match any
+# files processed when invoking mypy.
+warn_unused_configs = true
+# Warns about unneeded `# type: ignore` comments.
+warn_unused_ignores = true
+[[tool.mypy.overrides]]
+# Suppresses error messages about imports that cannot be resolved.
+ignore_missing_imports = true
+# Per-module configuration options
+module = [
+    "datasets.*",  # SEE: https://github.com/huggingface/datasets/issues/3841
+    "huggingface_hub.*",  # SEE: https://github.com/huggingface/huggingface_hub/issues/1662
+    "molbloom",  # SEE: https://github.com/whitead/molbloom/issues/29
+    "molsol",  # SEE: https://github.com/maykcaldas/molsol/issues/6
+    "onmt.*",
+    "setuptools_scm",  # SEE: https://github.com/pypa/setuptools_scm/issues/501
+    "transformers.*",  # SEE: https://github.com/huggingface/transformers/pull/18485
+]
+[tool.pylint]
+[tool.pylint.design]
+# Maximum number of attributes for a class (see R0902).
+max-attributes = 12
+[tool.pylint.format]
+# Maximum number of characters on a single line.
+max-line-length = 97  # Match ruff line-length
+[tool.pylint.main]
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs = 0
+[tool.pylint.messages_control]
+# Disable the message, report, category or checker with the given id(s).
+disable = [
+    "bare-except",  # Rely on ruff E722 for this
+    "broad-exception-caught",  # Rely on ruff BLE001 for this
+    "broad-exception-raised",  # Rely on ruff TRY002 for this
+    "dangerous-default-value",  # Rely on ruff W0102 for this
+    "empty-docstring",  # Let pep257 take care of docstrings
+    "expression-not-assigned",  # Rely on mypy func-returns-value for this
+    "fixme",  # codetags are useful
+    "function-redefined",  # Rely on mypy no-redef for this
+    "global-statement",  # Rely on ruff PLW0603 for this
+    "global-variable-not-assigned",  # Rely on ruff PLW0602 for this
+    "import-outside-toplevel",  # Rely on ruff PLC0415 for this
+    "keyword-arg-before-vararg",  # Rely on ruff B026 for this
+    "line-too-long",  # Rely on ruff E501 for this
+    "logging-fstring-interpolation",  # f-strings are convenient
+    "logging-too-many-args",  # Rely on ruff PLE1205 for this
+    "missing-docstring",  # Let docformatter and ruff take care of docstrings
+    "missing-final-newline",  # Rely on ruff W292 for this
+    "no-else-return",  # Rely on ruff RET506 for this
+    "no-member",  # Buggy, SEE: https://github.com/pylint-dev/pylint/issues/8138
+    "no-value-for-parameter",  # Rely on mypy call-arg for this
+    "protected-access",  # Don't care to enforce this in testing
+    "raise-missing-from",  # Rely on ruff B904 for this
+    "redefined-builtin",  # Rely on ruff A002 for this
+    "too-few-public-methods",  # Don't care to enforce this
+    "too-many-arguments",  # Don't care to enforce this
+    "too-many-boolean-expressions",  # Rely on ruff PLR0916 for this
+    "too-many-branches",  # Rely on ruff PLR0912 for this
+    "too-many-locals",  # Rely on ruff PLR0914 for this
+    "too-many-positional-arguments",  # Rely on ruff PLR0917 for this
+    "too-many-public-methods",  # Rely on ruff PLR0904 for this
+    "too-many-return-statements",  # Rely on ruff PLR0911 for this
+    "too-many-statements",  # Rely on ruff PLR0915 for this
+    "ungrouped-imports",  # Rely on ruff I001 for this
+    "unidiomatic-typecheck",  # Rely on ruff E721 for this
+    "unnecessary-dict-index-lookup",  # Rely on ruff PLR1733 for this
+    "unreachable",  # Rely on mypy unreachable for this
+    "unspecified-encoding",  # Rely on ruff PLW1514 for this
+    "unsubscriptable-object",  # Buggy, SEE: https://github.com/pylint-dev/pylint/issues/3637
+    "unsupported-membership-test",  # Buggy, SEE: https://github.com/pylint-dev/pylint/issues/3045
+    "unused-argument",  # Rely on ruff ARG002 for this
+    "unused-import",  # Rely on ruff F401 for this
+    "unused-variable",  # Rely on ruff F841 for this
+    "unused-wildcard-import",  # Wildcard imports are convenient
+    "use-sequence-for-iteration",  # Rely on ruff C0208 for this
+    "wildcard-import",  # Wildcard imports are convenient
+    "wrong-import-order",  # Rely on ruff I001 for this
+    "wrong-import-position",  # Rely on ruff E402 for this
+]
+# Enable the message, report, category or checker with the given id(s).
+enable = [
+    "useless-suppression",  # Print unused `pylint: disable` comments
+]
+[tool.pylint.reports]
+# Set true to activate the evaluation score.
+score = false
+[tool.pylint.similarities]
+# Minimum lines number of a similarity.
+min-similarity-lines = 10
+[tool.pytest.ini_options]
+# Add the specified `OPTS` to the set of command line arguments as if they had
+# been specified by the user.
+addopts = "--typeguard-packages=ether0 --doctest-modules"
+# List of directories that should be searched for tests when no specific directories,
+# files or test ids are given in the command line when executing pytest from the rootdir
+# directory. File system paths may use shell-style wildcards, including the recursive **
+# pattern.
+testpaths = ["packages/remotes", "src", "tests"]
+[tool.refurb]
+enable_all = true
+ignore = [
+    "FURB101",  # Rely on ruff FURB101 for this
+    "FURB103",  # Rely on ruff FURB103 for this
+    "FURB141",  # Rely on ruff PTH110 for this
+    "FURB144",  # Rely on ruff PTH107 for this
+    "FURB146",  # Rely on ruff PTH113 for this
+    "FURB147",  # Rely on ruff PTH118 for this
+    "FURB150",  # Rely on ruff PTH102 for this
+    "FURB155",  # Rely on ruff PTH202 for this
+]
+[tool.ruff]
+# Line length to use when enforcing long-lines violations (like `E501`).
+line-length = 97  # ceil(1.1 * 88) makes `E501` equivalent to `B950`
+# Enable application of unsafe fixes.
+unsafe-fixes = true
+[tool.ruff.format]
+# Enable reformatting of code snippets in docstrings.
+docstring-code-format = true
+# Enable preview style formatting.
+preview = true
+[tool.ruff.lint]
+explicit-preview-rules = true
+extend-select = [
+    "AIR002",
+    "AIR301",
+    "AIR302",
+    "AIR311",
+    "AIR312",
+    "ASYNC116",
+    "B901",
+    "B903",
+    "B909",
+    "CPY001",
+    "DOC201",
+    "DOC202",
+    "DOC402",
+    "DOC403",
+    "DOC501",
+    "DOC502",
+    "E111",
+    "E112",
+    "E113",
+    "E114",
+    "E115",
+    "E116",
+    "E117",
+    "E201",
+    "E202",
+    "E203",
+    "E204",
+    "E211",
+    "E221",
+    "E222",
+    "E223",
+    "E224",
+    "E225",
+    "E226",
+    "E227",
+    "E228",
+    "E231",
+    "E241",
+    "E242",
+    "E251",
+    "E252",
+    "E261",
+    "E262",
+    "E265",
+    "E266",
+    "E271",
+    "E272",
+    "E273",
+    "E274",
+    "E275",
+    "E301",
+    "E302",
+    "E303",
+    "E304",
+    "E305",
+    "E306",
+    "E502",
+    "FURB101",
+    "FURB103",
+    "FURB110",
+    "FURB113",
+    "FURB116",
+    "FURB118",
+    "FURB122",
+    "FURB131",
+    "FURB132",
+    "FURB140",
+    "FURB142",
+    "FURB145",
+    "FURB148",
+    "FURB152",
+    "FURB154",
+    "FURB156",
+    "FURB157",
+    "FURB162",
+    "FURB164",
+    "FURB166",
+    "FURB171",
+    "FURB180",
+    "FURB189",
+    "FURB192",
+    "LOG004",
+    "LOG014",
+    "PLC0415",
+    "PLC1901",
+    "PLC2701",
+    "PLC2801",
+    "PLE0304",
+    "PLE1141",
+    "PLE4703",
+    "PLR0202",
+    "PLR0203",
+    "PLR0904",
+    "PLR0914",
+    "PLR0916",
+    "PLR0917",
+    "PLR1702",
+    "PLR1733",
+    "PLR6104",
+    "PLR6201",
+    "PLR6301",
+    "PLW0108",
+    "PLW0177",
+    "PLW0244",
+    "PLW1514",
+    "PLW1641",
+    "PLW3201",
+    "PT028",
+    "PT029",
+    "PT030",
+    "PT031",
+    "PYI059",
+    "PYI061",
+    "RUF027",
+    "RUF028",
+    "RUF029",
+    "RUF031",
+    "RUF036",
+    "RUF037",
+    "RUF038",
+    "RUF039",
+    "RUF043",
+    "RUF045",
+    "RUF047",
+    "RUF049",
+    "RUF052",
+    "RUF053",
+    "RUF054",
+    "RUF055",
+    "RUF056",
+    "RUF057",
+    "RUF058",
+    "RUF059",
+    "RUF102",
+    "TC008",
+    "UP042",
+    "UP045",
+    "UP046",
+    "UP047",
+    "UP049",
+    "W391",
+]
+external = [
+    "FURB",  # refurb
+]
+ignore = [
+    "ANN",  # Don't care to enforce typing
+    "BLE001",  # Don't care to enforce blind exception catching
+    "C901",  # Don't care to limit complexity
+    "COM812",  # Trailing comma with black leads to wasting lines
+    "CPY001",  # Don't care to require copyright notices in every file
+    "D100",  # D100, D101, D102, D103, D104, D105, D106, D107: don't always need docstrings
+    "D101",
+    "D102",
+    "D103",
+    "D104",
+    "D105",
+    "D106",
+    "D107",
+    "D203",  # Keep docstring next to the class definition (covered by D211)
+    "D212",  # Summary should be on second line (opposite of D213)
+    "D402",  # It's nice to reuse the method name
+    "D406",  # Google style requires ":" at end
+    "D407",  # We aren't using numpy style
+    "D413",  # Blank line after last section. -> No blank line
+    "DOC201",  # Don't care to require Returns in docstrings
+    "DOC402",  # Don't care to require Yields in docstrings
+    "DOC501",  # Don't care to require Raises in docstrings
+    "DTZ",  # Don't care to have timezone safety
+    "EM",  # Overly pedantic
+    "FBT001",  # FBT001, FBT002: overly pedantic
+    "FBT002",
+    "FIX",  # Don't care to prevent TODO, FIXME, etc.
+    "G004",  # f-strings are convenient
+    "INP001",  # Can use namespace packages
+    "ISC001",  # For ruff format compatibility
+    "PLR0911",  # Don't care to limit complexity
+    "PLR0912",  # Don't care to limit complexity
+    "PLR0913",  # Don't care to limit complexity
+    "PLR0917",  # Don't care to limit complexity
+    "PTH",  # Overly pedantic
+    "SLF001",  # Overly pedantic
+    "T201",  # Overly pedantic
+    "TC001",  # TC001, TC002, TC003: don't care to enforce type checking blocks
+    "TC002",
+    "TC003",
+    "TC006",  # Strings in cast don't work with PyCharm CE 2024.3.4's jump-to-definition
+    "TD002",  # Don't care for TODO author
+    "TD003",  # Don't care for TODO links
+    "TRY003",  # Overly pedantic
+]
+preview = true
+select = ["ALL"]
+unfixable = [
+    "B007",  # While debugging, unused loop variables can be useful
+    "B905",  # Default fix is zip(strict=False), but that can hide bugs
+    "ERA001",  # While debugging, temporarily commenting code can be useful
+    "F401",  # While debugging, unused imports can be useful
+    "F841",  # While debugging, unused locals can be useful
+    "TC004",  # While debugging, it can be nice to keep TYPE_CHECKING in-tact
+]
+[tool.ruff.lint.flake8-annotations]
+mypy-init-return = true
+[tool.ruff.lint.per-file-ignores]
+"**/tests/*.py" = [
+    "N802",  # Tests function names can match class names
+    "PLR2004",  # Tests can have magic values
+    "PLR6301",  # Test classes can ignore self
+    "S101",  # Tests can have assertions
+]
+[tool.ruff.lint.pycodestyle]
+# The maximum line length to allow for line-length violations within
+# documentation (W505), including standalone comments.
+max-doc-length = 97  # Match line-length
+[tool.ruff.lint.pydocstyle]
+# Whether to use Google-style or NumPy-style conventions or the PEP257
+# defaults when analyzing docstring sections.
+convention = "google"
+[tool.tomlsort]
+all = true
+in_place = true
+spaces_before_inline_comment = 2  # Match Python PEP 8
+spaces_indent_inline_array = 4  # Match Python PEP 8
+trailing_comma_inline_array = true
+[tool.uv.sources]
+ether0 = {workspace = true}
+"ether0.remotes" = {workspace = true}
+[tool.uv.workspace]
+members = ["packages/*"]

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/ether0.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,467 @@

+Metadata-Version: 2.4
+Name: ether0
+Version: 0.0.0
+Summary: Data models, rewards, and utility functions for ether0.
+Author-email: FutureHouse technical staff <[email protected]>
+License:                                  Apache License
+                                   Version 2.0, January 2004
+                                http://www.apache.org/licenses/
+           TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+           1. Definitions.
+              "License" shall mean the terms and conditions for use, reproduction,
+              and distribution as defined by Sections 1 through 9 of this document.
+              "Licensor" shall mean the copyright owner or entity authorized by
+              the copyright owner that is granting the License.
+              "Legal Entity" shall mean the union of the acting entity and all
+              other entities that control, are controlled by, or are under common
+              control with that entity. For the purposes of this definition,
+              "control" means (i) the power, direct or indirect, to cause the
+              direction or management of such entity, whether by contract or
+              otherwise, or (ii) ownership of fifty percent (50%) or more of the
+              outstanding shares, or (iii) beneficial ownership of such entity.
+              "You" (or "Your") shall mean an individual or Legal Entity
+              exercising permissions granted by this License.
+              "Source" form shall mean the preferred form for making modifications,
+              including but not limited to software source code, documentation
+              source, and configuration files.
+              "Object" form shall mean any form resulting from mechanical
+              transformation or translation of a Source form, including but
+              not limited to compiled object code, generated documentation,
+              and conversions to other media types.
+              "Work" shall mean the work of authorship, whether in Source or
+              Object form, made available under the License, as indicated by a
+              copyright notice that is included in or attached to the work
+              (an example is provided in the Appendix below).
+              "Derivative Works" shall mean any work, whether in Source or Object
+              form, that is based on (or derived from) the Work and for which the
+              editorial revisions, annotations, elaborations, or other modifications
+              represent, as a whole, an original work of authorship. For the purposes
+              of this License, Derivative Works shall not include works that remain
+              separable from, or merely link (or bind by name) to the interfaces of,
+              the Work and Derivative Works thereof.
+              "Contribution" shall mean any work of authorship, including
+              the original version of the Work and any modifications or additions
+              to that Work or Derivative Works thereof, that is intentionally
+              submitted to Licensor for inclusion in the Work by the copyright owner
+              or by an individual or Legal Entity authorized to submit on behalf of
+              the copyright owner. For the purposes of this definition, "submitted"
+              means any form of electronic, verbal, or written communication sent
+              to the Licensor or its representatives, including but not limited to
+              communication on electronic mailing lists, source code control systems,
+              and issue tracking systems that are managed by, or on behalf of, the
+              Licensor for the purpose of discussing and improving the Work, but
+              excluding communication that is conspicuously marked or otherwise
+              designated in writing by the copyright owner as "Not a Contribution."
+              "Contributor" shall mean Licensor and any individual or Legal Entity
+              on behalf of whom a Contribution has been received by Licensor and
+              subsequently incorporated within the Work.
+           2. Grant of Copyright License. Subject to the terms and conditions of
+              this License, each Contributor hereby grants to You a perpetual,
+              worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+              copyright license to reproduce, prepare Derivative Works of,
+              publicly display, publicly perform, sublicense, and distribute the
+              Work and such Derivative Works in Source or Object form.
+           3. Grant of Patent License. Subject to the terms and conditions of
+              this License, each Contributor hereby grants to You a perpetual,
+              worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+              (except as stated in this section) patent license to make, have made,
+              use, offer to sell, sell, import, and otherwise transfer the Work,
+              where such license applies only to those patent claims licensable
+              by such Contributor that are necessarily infringed by their
+              Contribution(s) alone or by combination of their Contribution(s)
+              with the Work to which such Contribution(s) was submitted. If You
+              institute patent litigation against any entity (including a
+              cross-claim or counterclaim in a lawsuit) alleging that the Work
+              or a Contribution incorporated within the Work constitutes direct
+              or contributory patent infringement, then any patent licenses
+              granted to You under this License for that Work shall terminate
+              as of the date such litigation is filed.
+           4. Redistribution. You may reproduce and distribute copies of the
+              Work or Derivative Works thereof in any medium, with or without
+              modifications, and in Source or Object form, provided that You
+              meet the following conditions:
+              (a) You must give any other recipients of the Work or
+                  Derivative Works a copy of this License; and
+              (b) You must cause any modified files to carry prominent notices
+                  stating that You changed the files; and
+              (c) You must retain, in the Source form of any Derivative Works
+                  that You distribute, all copyright, patent, trademark, and
+                  attribution notices from the Source form of the Work,
+                  excluding those notices that do not pertain to any part of
+                  the Derivative Works; and
+              (d) If the Work includes a "NOTICE" text file as part of its
+                  distribution, then any Derivative Works that You distribute must
+                  include a readable copy of the attribution notices contained
+                  within such NOTICE file, excluding those notices that do not
+                  pertain to any part of the Derivative Works, in at least one
+                  of the following places: within a NOTICE text file distributed
+                  as part of the Derivative Works; within the Source form or
+                  documentation, if provided along with the Derivative Works; or,
+                  within a display generated by the Derivative Works, if and
+                  wherever such third-party notices normally appear. The contents
+                  of the NOTICE file are for informational purposes only and
+                  do not modify the License. You may add Your own attribution
+                  notices within Derivative Works that You distribute, alongside
+                  or as an addendum to the NOTICE text from the Work, provided
+                  that such additional attribution notices cannot be construed
+                  as modifying the License.
+              You may add Your own copyright statement to Your modifications and
+              may provide additional or different license terms and conditions
+              for use, reproduction, or distribution of Your modifications, or
+              for any such Derivative Works as a whole, provided Your use,
+              reproduction, and distribution of the Work otherwise complies with
+              the conditions stated in this License.
+           5. Submission of Contributions. Unless You explicitly state otherwise,
+              any Contribution intentionally submitted for inclusion in the Work
+              by You to the Licensor shall be under the terms and conditions of
+              this License, without any additional terms or conditions.
+              Notwithstanding the above, nothing herein shall supersede or modify
+              the terms of any separate license agreement you may have executed
+              with Licensor regarding such Contributions.
+           6. Trademarks. This License does not grant permission to use the trade
+              names, trademarks, service marks, or product names of the Licensor,
+              except as required for reasonable and customary use in describing the
+              origin of the Work and reproducing the content of the NOTICE file.
+           7. Disclaimer of Warranty. Unless required by applicable law or
+              agreed to in writing, Licensor provides the Work (and each
+              Contributor provides its Contributions) on an "AS IS" BASIS,
+              WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+              implied, including, without limitation, any warranties or conditions
+              of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+              PARTICULAR PURPOSE. You are solely responsible for determining the
+              appropriateness of using or redistributing the Work and assume any
+              risks associated with Your exercise of permissions under this License.
+           8. Limitation of Liability. In no event and under no legal theory,
+              whether in tort (including negligence), contract, or otherwise,
+              unless required by applicable law (such as deliberate and grossly
+              negligent acts) or agreed to in writing, shall any Contributor be
+              liable to You for damages, including any direct, indirect, special,
+              incidental, or consequential damages of any character arising as a
+              result of this License or out of the use or inability to use the
+              Work (including but not limited to damages for loss of goodwill,
+              work stoppage, computer failure or malfunction, or any and all
+              other commercial damages or losses), even if such Contributor
+              has been advised of the possibility of such damages.
+           9. Accepting Warranty or Additional Liability. While redistributing
+              the Work or Derivative Works thereof, You may choose to offer,
+              and charge a fee for, acceptance of support, warranty, indemnity,
+              or other liability obligations and/or rights consistent with this
+              License. However, in accepting such obligations, You may act only
+              on Your own behalf and on Your sole responsibility, not on behalf
+              of any other Contributor, and only if You agree to indemnify,
+              defend, and hold each Contributor harmless for any liability
+              incurred by, or claims asserted against, such Contributor by reason
+              of your accepting any such warranty or additional liability.
+           END OF TERMS AND CONDITIONS
+           APPENDIX: How to apply the Apache License to your work.
+              To apply the Apache License to your work, attach the following
+              boilerplate notice, with the fields enclosed by brackets "[]"
+              replaced with your own identifying information. (Don't include
+              the brackets!)  The text should be enclosed in the appropriate
+              comment syntax for the file format. We also recommend that a
+              file or class name and description of purpose be included on the
+              same "printed page" as the copyright notice for easier
+              identification within third-party archives.
+           Copyright 2025 FutureHouse
+           Licensed under the Apache License, Version 2.0 (the "License");
+           you may not use this file except in compliance with the License.
+           You may obtain a copy of the License at
+               http://www.apache.org/licenses/LICENSE-2.0
+           Unless required by applicable law or agreed to in writing, software
+           distributed under the License is distributed on an "AS IS" BASIS,
+           WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+           See the License for the specific language governing permissions and
+           limitations under the License.
+Project-URL: issues, https://github.com/Future-House/ether0/issues
+Project-URL: repository, https://github.com/Future-House/ether0
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Scientific/Engineering :: Chemistry
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: accelerate>=1.10.1
+Requires-Dist: datasets
+Requires-Dist: exmol>=3.3.0
+Requires-Dist: gradio>=5.44.0
+Requires-Dist: httpx
+Requires-Dist: huggingface-hub
+Requires-Dist: molbloom==2.3.4
+Requires-Dist: pydantic>=2
+Requires-Dist: rdkit
+Requires-Dist: regex
+Requires-Dist: spaces>=0.40.1
+Requires-Dist: tenacity
+Provides-Extra: add-tokens
+Requires-Dist: ipykernel; extra == "add-tokens"
+Requires-Dist: ipywidgets>=8; extra == "add-tokens"
+Requires-Dist: transformers>=4.49; extra == "add-tokens"
+Provides-Extra: baselines
+Requires-Dist: fhaviary>=0.19; extra == "baselines"
+Requires-Dist: fhlmi>=0.26; extra == "baselines"
+Requires-Dist: ipython; extra == "baselines"
+Provides-Extra: dev
+Requires-Dist: ether0[add-tokens,typing]; extra == "dev"
+Requires-Dist: huggingface-hub[cli]; extra == "dev"
+Requires-Dist: ipython>=8; extra == "dev"
+Requires-Dist: mypy>=1.8; extra == "dev"
+Requires-Dist: pre-commit>=3.4; extra == "dev"
+Requires-Dist: pylint>=3; extra == "dev"
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-subtests; extra == "dev"
+Requires-Dist: pytest-sugar; extra == "dev"
+Requires-Dist: pytest-timer[colorama]; extra == "dev"
+Requires-Dist: pytest-xdist; extra == "dev"
+Requires-Dist: refurb>=2; extra == "dev"
+Requires-Dist: typeguard; extra == "dev"
+Provides-Extra: typing
+Requires-Dist: types-regex; extra == "typing"
+Dynamic: license-file
+# ether0 Reward Model
+[![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/Future-House/ether0)
+[![arXiv](https://img.shields.io/badge/arXiv-2506.17238-b31b1b.svg)](https://arxiv.org/abs/2506.17238)
+[![Project Status: Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
+![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)
+[![Tests](https://github.com/Future-House/ether0/actions/workflows/lint-test.yaml/badge.svg)](https://github.com/Future-House/ether0/actions)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![python](https://img.shields.io/badge/python-3.11+-blue?style=flat&logo=python&logoColor=white)](https://www.python.org)
+[![Model on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/model-on-hf-md-dark.svg)](https://huggingface.co/futurehouse/ether0)
+[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md-dark.svg)](https://huggingface.co/datasets/futurehouse/ether0-benchmark)
+![ether0 logo](docs/assets/ether0_logo.svg)
+_ether0: a scientific reasoning model, dataset, and reward functions for chemistry._
+This repo contains the reward model for evaluating ether0 and similar models,
+along with utilities for working with the verifiable rewards in
+[our benchmark](https://huggingface.co/datasets/futurehouse/ether0-benchmark).
+## Overview
+ether0 is a reasoning language model post-trained through a loop of:
+1. Supervised fine-tuning (SFT) on long chain-of-thought reasoning traces,
+   to elicit reasoning from a base model.
+2. Reinforcement learning with verifiable rewards (RLVR)
+   to improve reasoning on focused task groups, at their own pace.
+   These multitask learned models are referred to as 'specialists'.
+3. Rejection sampling to filter specialists' reasoning
+   for correctness and quality.
+4. SFT on the base model again to make a 'generalist' reasoning model.
+5. RLVR to recover any lost performance and push further in an all-task setting.
+![ether0 training info](docs/assets/training_info.png)
+### Repo Structure
+This repo contains several packages:
+- `ether0`: reward functions, `rdkit` data utilities,
+  dataset generation prompts, dataset data models,
+  language model training prompts, and data models.
+- `ether0.remotes`: server code for ether0 reward functions involving
+  exotic packages and/or third party models.
+> [!NOTE]
+> This repo does not contain training code,
+> although you can find open source repositories like [NeMo-RL](https://github.com/NVIDIA/NeMo-RL)
+> or [Hugging Face TRL](https://github.com/huggingface/trl)
+> that can do the SFT and RL phases of training.
+### Open Weights
+Please see our open-source weights on Hugging Face:
+<https://huggingface.co/futurehouse/ether0>
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("futurehouse/ether0")
+tokenizer = AutoTokenizer.from_pretrained("futurehouse/ether0")
+```
+### Open Test Set
+Please see our open-source benchmark (test set) on Hugging Face:
+<https://huggingface.co/datasets/futurehouse/ether0-benchmark>
+```python
+from datasets import load_dataset
+test_ds = load_dataset("futurehouse/ether0-benchmark", split="test")
+```
+## Usage
+### Installation
+The easiest way to get started is a `pip install` from GitHub:
+```bash
+pip install git+https://github.com/Future-House/ether0.git
+```
+Or if you want the full set up, clone the repo and use `uv`:
+```bash
+git clone https://github.com/Future-House/ether0.git
+cd ether0
+uv sync
+```
+### Reward Functions
+Here is a basic example of how to use the reward functions:
+```python
+from ether0.rewards import valid_mol_eval
+# Task: provide a valid completion of this molecule
+partial_smiles = "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14"
+# Here's two model-proposed SMILES completions
+invalid_completion_smiles = "CCC"
+valid_completion_smiles = ")C=6C=CC=CC6"
+# Evaluate the completions
+assert not valid_mol_eval(invalid_completion_smiles, partial_smiles)
+assert valid_mol_eval(valid_completion_smiles, partial_smiles)
+```
+### Visualization
+If it helps, you can visualize the molecules:
+```python
+from ether0.data import draw_molecule
+# See above reward functions demo for where these came from
+partial_smiles = "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14"
+invalid_completion_smiles = "CCC"
+valid_completion_smiles = ")C=6C=CC=CC6"
+valid_mol_text = draw_molecule(partial_smiles + valid_completion_smiles)
+with open("valid_molecule.svg", "w") as f:
+    f.write(valid_mol_text)
+```
+The output of `draw_molecule` can also be easily visualized using `IPython.display`,
+or in your terminal via `chafa valid_molecule.svg`
+([chafa docs](https://hpjansson.org/chafa/)).
+![valid molecule](docs/assets/valid_molecule.svg)
+### Benchmark
+Here is a sample baseline of
+[`ether0-benchmark`](https://huggingface.co/datasets/futurehouse/ether0-benchmark)
+on `gpt-4o` using [`lmi`](https://github.com/Future-House/ldp/tree/main/packages/lmi).
+To install `lmi`, please install `ether0` with the `baselines` extra
+(for example `uv sync --extra baselines`).
+We also need to run our remote rewards server via `ether0-serve`
+(for more information, see [`ether0.remotes` docs](packages/remotes/README.md)):
+```bash
+ETHER0_REMOTES_API_TOKEN=abc123 ether0-serve
+```
+Next, start `ipython` with the relevant environment variables set:
+```bash
+ETHER0_REMOTES_API_BASE_URL="http://127.0.0.1:8000" ETHER0_REMOTES_API_TOKEN=abc123 \
+    ipython
+```
+And run the following Python code:
+```python
+import itertools
+import statistics
+from collections import defaultdict
+from aviary.core import Message
+from datasets import load_dataset
+from lmi import LiteLLMModel
+from tqdm.asyncio import tqdm_asyncio as asyncio
+from ether0.data import get_problem_category
+from ether0.model_prompts import LOOSE_XML_ANSWER_USER_PROMPT, extract_answer_loose
+from ether0.models import RewardFunctionInfo
+from ether0.rewards import EVAL_FUNCTIONS
+# Add LLM prompt of your making to the dataset
+test_ds = load_dataset("futurehouse/ether0-benchmark", split="test").map(
+    lambda x: {"prompt": "\n\n".join((LOOSE_XML_ANSWER_USER_PROMPT, x["problem"]))}
+)
+# Prompt to LLM
+model = LiteLLMModel(name="gpt-4o")
+results = await asyncio.gather(
+    *(model.acompletion([Message(content=row["prompt"])]) for row in test_ds),
+    desc="Running evaluation",
+)
+# Compute rewards
+per_category_rewards = defaultdict(list)
+for row, result in zip(test_ds, results, strict=True):
+    # NOTE: you can also use `ether0.rewards.accuracy_reward`,
+    # but we decided to go a bit "lower level" for this demo
+    reward_info = RewardFunctionInfo.model_validate(row["solution"])
+    yhat = extract_answer_loose(result[0].text)
+    reward = EVAL_FUNCTIONS[reward_info.fxn_name](
+        yhat=yhat, y=reward_info.answer_info, test=True
+    )
+    per_category_rewards[get_problem_category(reward_info.problem_type)].append(reward)
+for category, rewards in sorted(per_category_rewards.items()):
+    print(
+        f"In category {category!r} of {len(rewards)} questions,"
+        f" average reward was {statistics.mean(rewards):.3f}."
+    )
+accuracy = statistics.mean(itertools.chain.from_iterable(per_category_rewards.values()))
+print(f"Cumulative average reward across {len(test_ds)} questions was {accuracy:.3f}.")
+```

src/ether0.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+.gitignore
+.pre-commit-config.yaml
+.python-version
+CITATION.cff
+LICENSE
+README.md
+pyproject.toml
+uv.lock
+.github/renovate.json5
+.github/workflows/lint-test.yaml
+docs/adding_tokens.ipynb
+docs/updated_mistral_chat_template.jinja
+docs/assets/ether0_logo.svg
+docs/assets/training_info.png
+docs/assets/valid_molecule.svg
+packages/remotes/.gitignore
+packages/remotes/LICENSE
+packages/remotes/README.md
+packages/remotes/pyproject.toml
+packages/remotes/src/ether0/server.py
+packages/remotes/tests/conftest.py
+packages/remotes/tests/test_clients.py
+packages/remotes/tests/test_rewards.py
+src/ether0/chat.py
+src/ether0/clients.py
+src/ether0/data.py
+src/ether0/fingerprints.bloom
+src/ether0/model_prompts.py
+src/ether0/models.py
+src/ether0/problem_prompts.py
+src/ether0/py.typed
+src/ether0/rewards.py
+src/ether0/rings.bloom
+src/ether0/utils.py
+src/ether0.egg-info/PKG-INFO
+src/ether0.egg-info/SOURCES.txt
+src/ether0.egg-info/dependency_links.txt
+src/ether0.egg-info/requires.txt
+src/ether0.egg-info/top_level.txt
+tests/conftest.py
+tests/test_chat.py
+tests/test_data.py
+tests/test_model_prompts.py
+tests/test_models.py
+tests/test_rewards.py
+tests/test_utils.py

src/ether0.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/ether0.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,40 @@

+accelerate>=1.10.1
+datasets
+exmol>=3.3.0
+gradio>=5.44.0
+httpx
+huggingface-hub
+molbloom==2.3.4
+pydantic>=2
+rdkit
+regex
+spaces>=0.40.1
+tenacity
+[add-tokens]
+ipykernel
+ipywidgets>=8
+transformers>=4.49
+[baselines]
+fhaviary>=0.19
+fhlmi>=0.26
+ipython
+[dev]
+ether0[add-tokens,typing]
+huggingface-hub[cli]
+ipython>=8
+mypy>=1.8
+pre-commit>=3.4
+pylint>=3
+pytest
+pytest-subtests
+pytest-sugar
+pytest-timer[colorama]
+pytest-xdist
+refurb>=2
+typeguard
+[typing]
+types-regex

src/ether0.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ether0

src/ether0/chat.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import re
+from collections.abc import Callable, MutableMapping
+from dataclasses import dataclass, field
+from functools import wraps
+from itertools import starmap
+from typing import Any, ParamSpec, TypeVar, cast
+from ether0.model_prompts import (
+    ANSWER_END,
+    ANSWER_START,
+    THINK_END,
+    THINK_START,
+    ProblemPrompt,
+    SysPrompt,
+    extract_answer_loose,
+)
+from ether0.rewards import accuracy_reward, format_reward
+P = ParamSpec("P")
+R = TypeVar("R")
+def wrap_reward_func(func: Callable[P, R], **wrap_kwargs: Any) -> Callable[P, R]:
+    @wraps(func)  # needed by GRPOTrainer for logging
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> R:
+        return func(*args, **wrap_kwargs, **kwargs)
+    return wrapped
+@dataclass
+class ChatArguments:
+    """Arguments for making a chat conversation for SFT or RL training."""
+    sys_prompt: SysPrompt | None = field(
+        default=None,
+        metadata={
+            "help": (
+                "If provided, use this system prompt. If not provided, the chat"
+                " template may inject one."
+            )
+        },
+    )
+    problem_prompt: ProblemPrompt = field(
+        default=ProblemPrompt.NONE,
+        metadata={
+            "help": (
+                "Prompt to put before the problem in the first user message, relevant"
+                " for both RL or SFT. Make sure this matches between SFT and RL, so if"
+                " the SFT'd model wasn't passed this during SFT, don't pass this to RL."
+            )
+        },
+    )
+    reasoning: bool = field(
+        default=True,
+        metadata={
+            "help": (
+                "If True (default), it is assumed that the model's response contains"
+                f" reasoning enclosed in `{THINK_START}` and `{THINK_END}`."
+            )
+        },
+    )
+    def make_rl_conversation(
+        self, row: MutableMapping[str, str | list[str]]
+    ) -> dict[str, list[dict] | list[list[dict]]]:
+        """Format a dataset row into a chat-like conversation structure.
+        This will add a `messages` key to the dataset. Unlike make_sft_convo,
+        the answer will not be included.
+        """
+        if not self.sys_prompt:
+            msgs: list[dict] = []
+        else:
+            msgs = [{
+                "role": "system",
+                "content": SysPrompt(self.sys_prompt).get_sys_prompt(),
+            }]
+        problem_prompt = ProblemPrompt(self.problem_prompt).get_prompt()
+        if problem_prompt:
+            problem_prompt += "\n\n"
+        def add_user(problem: str) -> list[dict]:
+            return [*msgs, {"role": "user", "content": problem_prompt + problem}]
+        if isinstance(row["problem"], str):  # Single
+            all_msgs: list[dict] | list[list[dict]] = add_user(row["problem"])
+        else:  # Batched
+            all_msgs = [add_user(p) for p in row["problem"]]
+        return {"prompt": all_msgs}
+    def make_sft_conversation(
+        self, row: MutableMapping[str, str | list[str]]
+    ) -> dict[str, list[dict] | list[list[dict]]]:
+        """Format a dataset row into a chat-like conversation structure.
+        This will add a `messages` key to the dataset.
+        """
+        if (
+            self.reasoning
+            and ProblemPrompt(self.problem_prompt) == ProblemPrompt.ANSWER
+        ):
+            raise ValueError(
+                "It does not make sense to include reasoning in the SFT traces,"
+                " but then only prompt about answer XML (without thoughts)."
+            )
+        def add_assistant(
+            raw_answer: str, thought: str, prior_msgs: list[dict]
+        ) -> list[dict]:
+            if re.search(r"<\/answer>", raw_answer):
+                # Remove prelude and postlude plus XML tags,
+                # because an OpenRouter-hosted DeepSeek R1 can give answer
+                # with a prelude and XML tags, but our training expects just an answer
+                # > The reaction involves sodium borohydride ([BH4-].[Na+]), <redacted>.
+                # > Under these conditions, <redacted>.
+                # > <answer>N1(CCOCC1)C1=CC=C(C(O))C=C1</answer>
+                answer = extract_answer_loose(raw_answer)
+                if not answer:
+                    raise ValueError(
+                        "Failed to extract just the answer from the answer"
+                        f" {raw_answer!r}."
+                    )
+            else:
+                answer = raw_answer
+            return [
+                *prior_msgs,
+                {
+                    "role": "assistant",
+                    "content": (
+                        (f"{THINK_START}{thought}{THINK_END}" if self.reasoning else "")
+                        + f"{ANSWER_START}{answer}{ANSWER_END}"
+                    ),
+                },
+            ]
+        # The first part will be the same as the RL conversation
+        msgs = self.make_rl_conversation(row)["prompt"]
+        # Now add the answer, with optional thinking
+        if isinstance(row["problem"], str):  # Single
+            all_msgs: list[dict] | list[list[dict]] = add_assistant(
+                cast(str, row["answer"]),
+                cast(str, row["thought"]),
+                cast(list[dict], msgs),
+            )
+        else:  # Batched
+            all_msgs = list(
+                starmap(
+                    add_assistant, zip(row["answer"], row["thought"], msgs, strict=True)
+                )
+            )
+        return {"messages": all_msgs}
+    def get_reward_funcs(
+        self,
+        format_reward_value: float = 1.0,
+        soft: bool = False,
+        test: bool = False,
+        good_molecule_bonus: float = 0.0,
+    ) -> list[Callable]:
+        return [
+            wrap_reward_func(
+                format_reward,
+                reasoning=self.reasoning,
+                reward=format_reward_value,
+            ),
+            wrap_reward_func(
+                accuracy_reward,
+                reasoning=self.reasoning,
+                soft=soft,
+                test=test,
+                good_molecule_bonus=good_molecule_bonus,
+            ),
+        ]

src/ether0/clients.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import logging
+import os
+from collections import Counter
+from collections.abc import Mapping
+from typing import Any
+import httpx
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
+logger = logging.getLogger(__name__)
+BASE_URL = os.environ.get("ETHER0_REMOTES_API_BASE_URL")
+HEADERS = {
+    "Authorization": f"Bearer {os.environ.get('ETHER0_REMOTES_API_TOKEN')}",
+    "Content-Type": "application/json",
+}
+SERVER_ERRORS_COUNTER = Counter({
+    "fetch_solubility": 0,
+    "fetch_purchasable": 0,
+    "fetch_forward_rxn": 0,
+    "fetch_rxn_info": 0,
+})
+THROW_500_ERROR_THRESHOLD = int(
+    os.environ.get("ETHER0_REMOTES_THROW_500_ERROR_THRESHOLD", "100")
+)
+# If our server throws a 501, we don't retry
+OUR_SERVER_DONT_RETRY_CODE = httpx.codes.NOT_IMPLEMENTED.value
+REMOTE_WORKER_COLD_START_TIME = 180  # sec
+class RetryableServerError(Exception):
+    """Retryable server error."""
+    @classmethod
+    def check_raise(
+        cls, response: httpx.Response, kwargs: Mapping[str, Any] | None = None
+    ) -> None:
+        if (
+            response.is_server_error
+            and response.status_code != OUR_SERVER_DONT_RETRY_CODE
+        ):
+            raise cls(
+                f"Retryable server error with status code {response.status_code}"
+                f" and inputs {kwargs or {}} and response {response=}."
+            )
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_fixed(1),
+    retry=retry_if_exception_type((
+        httpx.ReadTimeout,
+        httpx.ConnectError,
+        RetryableServerError,
+    )),
+)
+def fetch_solubility(query_smiles: str) -> dict:
+    response = httpx.post(
+        f"{BASE_URL}/compute_solubility",
+        json={"smiles": query_smiles},
+        headers=HEADERS,
+        timeout=REMOTE_WORKER_COLD_START_TIME,
+    )
+    error_message = ""
+    if response.is_success:
+        result = response.json()
+        if "error" in result:
+            error_message = result["error"]
+        else:
+            solubility = result["mean"]
+            return {"smiles": query_smiles, "solubility": solubility}
+    if response.is_redirect or response.is_server_error:
+        # We should not have redirect responses or server errors, so let's retry these
+        error_message = response.text
+        SERVER_ERRORS_COUNTER["fetch_solubility"] += 1
+        if SERVER_ERRORS_COUNTER["fetch_solubility"] >= THROW_500_ERROR_THRESHOLD:
+            response.raise_for_status()
+        RetryableServerError.check_raise(
+            response, kwargs={"query_smiles": query_smiles}
+        )
+    if error_message:
+        logger.warning(
+            f"fetch_solubility did not succeed on {query_smiles=} with"
+            f" {response=} and {error_message=}."
+        )
+    return {
+        "smiles": query_smiles,
+        "error": f"API error: {response} - {error_message}",
+    }
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_fixed(1),
+    retry=retry_if_exception_type((
+        httpx.ReadTimeout,
+        httpx.ConnectError,
+        RetryableServerError,
+    )),
+)
+def fetch_purchasable(query_smiles_list: list[str] | str) -> dict[str, bool]:
+    response = httpx.post(
+        f"{BASE_URL}/is_purchasable",
+        json={"smiles": query_smiles_list},
+        headers=HEADERS,
+        timeout=REMOTE_WORKER_COLD_START_TIME,
+    )
+    if response.is_success:
+        return response.json()
+    logger.warning(
+        f"fetch_purchasable did not succeed on {query_smiles_list=} with"
+        f" {response=} and {response.text=}."
+    )
+    if response.is_redirect or response.is_server_error:
+        # We should not have redirect responses or server errors, so let's retry these
+        SERVER_ERRORS_COUNTER["fetch_purchasable"] += 1
+        if SERVER_ERRORS_COUNTER["fetch_purchasable"] >= THROW_500_ERROR_THRESHOLD:
+            response.raise_for_status()
+        RetryableServerError.check_raise(
+            response, kwargs={"query_smiles_list": query_smiles_list}
+        )
+    return {}
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_fixed(1),
+    retry=retry_if_exception_type((
+        httpx.ReadTimeout,
+        httpx.ConnectError,
+        RetryableServerError,
+    )),
+)
+def fetch_forward_rxn(query_rxn_smiles: str) -> dict[str, str]:
+    response = httpx.post(
+        f"{BASE_URL}/translate",
+        json={"reaction": query_rxn_smiles},
+        headers=HEADERS,
+        timeout=REMOTE_WORKER_COLD_START_TIME,
+    )
+    if response.is_success:
+        result = response.json()
+        product = result["product"]
+        return {"smiles": query_rxn_smiles, "product": product}
+    logger.warning(
+        f"fetch_forward_rxn did not succeed on {query_rxn_smiles=} with"
+        f" {response=} and {response.text=}."
+    )
+    if response.is_redirect or response.is_server_error:
+        # We should not have redirect responses or server errors, so let's retry these
+        SERVER_ERRORS_COUNTER["fetch_forward_rxn"] += 1
+        if SERVER_ERRORS_COUNTER["fetch_forward_rxn"] >= THROW_500_ERROR_THRESHOLD:
+            response.raise_for_status()
+        RetryableServerError.check_raise(
+            response, kwargs={"query_rxn_smiles": query_rxn_smiles}
+        )
+    return {
+        "smiles": query_rxn_smiles,
+        "error": f"API error: {response} - {response.text}",
+    }

src/ether0/data.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import logging
+import re
+from collections.abc import Collection, Mapping
+from pathlib import Path
+from datasets import Dataset
+from molbloom import BloomFilter, canon
+from rdkit import Chem
+from rdkit.Chem.Draw import MolDraw2D, MolDraw2DSVG  # pylint: disable=no-name-in-module
+from rdkit.Chem.Draw.rdMolDraw2D import MolDraw2DCairo
+from rdkit.Chem.rdChemReactions import (  # pylint: disable=no-name-in-module
+    ReactionFromSmarts,
+)
+from rdkit.Chem.rdDepictor import (  # pylint: disable=no-name-in-module
+    Compute2DCoords,
+    StraightenDepiction,
+)
+from rdkit.Chem.rdMolDescriptors import (  # pylint: disable=no-name-in-module
+    GetMorganFingerprint,
+)
+from rdkit.Chem.rdmolfiles import MolFromSmiles  # pylint: disable=no-name-in-module
+logger = logging.getLogger(__name__)
+PROBLEM_CATEGORY_TO_NICKNAME: Mapping[str, str] = {
+    "functional-group": "functional group",
+    "molecule-caption": "molecule caption",
+    "molecule-completion": "SMILES completion",
+    "molecule-formula": "elucidation",
+    "molecule-name": "IUPAC name",
+    "oracle-solubility": "solubility edit",
+    "property": "multiple choice",
+    "property-cat-brain": "BBB permeability",
+    "property-cat-eve": "Human receptor binding",
+    "property-cat-safety": "safety",
+    "property-cat-smell": "scent",
+    "property-regression-pka": "pKa",
+    "property-regression-ld50": "LD50",
+    "property-regression-adme": "ADME",
+    "reaction-prediction": "reaction prediction",
+    "retro-synthesis": "retrosynthesis",
+    "simple-formula": "molecular formula",
+    "property-regression-adme/log_hlm_clint": "log of HLM CL$_{\\text{int}}$",
+    "property-regression-adme/log_mdr1-mdck_er": "log of MDR1-MDCK ER",
+    "property-regression-adme/log_rlm_clint": "log of RLM CL$_{\\text{int}}$",
+    "property-regression-adme/log_solubility": "log of aqueous solubility",
+}
+def get_problem_type(row: Mapping[str, str]) -> str:
+    return row.get("problem_type") or row["type"]
+def get_problem_category(problem_type: str | None) -> str:
+    return (problem_type or "").split("/", maxsplit=1)[0]
+def get_problem_categories_from_datasets(*datasets: Dataset) -> Collection[str]:
+    return {
+        get_problem_category(pt)
+        for dataset in datasets
+        for pt in (dataset.hf_dataset if hasattr(dataset, "hf_dataset") else dataset)[
+            "problem_type"
+        ]
+    }
+# Use this regex with findall to extract SMILES strings from text.
+# Note this function currently fails on counterions e.g.
+# Cc1ccc(-c2ccc3c(c2)c2ccccc2c[n+]3C)cc1.[Cl-]
+SMILES_PATTERN = re.compile(
+    r"(?<!\w)(?:(?:Cl|Br|[BCNOPSFIC]|[cnops]|\[[^\]]+?\]|[0-9@+\-=#\\/()%])){4,}(?!\w)"
+)
+def make_sized_d2d(w: int = 400, h: int = 300) -> MolDraw2DCairo:
+    return MolDraw2DCairo(w, h)
+def draw_molecule(
+    smiles: str, bg_opacity: float = 1.0, d2d: MolDraw2D | None = None
+) -> str:
+    """Draw a SMILES molecule and return the drawing string."""
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
+        raise ValueError(f"Failed to convert {smiles=} to a molecule.")
+    Compute2DCoords(mol)
+    StraightenDepiction(mol)
+    if d2d is None:
+        d2d = MolDraw2DSVG(-1, -1)
+    dopts = d2d.drawOptions()
+    dopts.useBWAtomPalette()
+    dopts.setBackgroundColour((*dopts.getBackgroundColour(), bg_opacity))
+    d2d.DrawMolecule(mol)
+    d2d.FinishDrawing()
+    return d2d.GetDrawingText()
+def draw_reaction(
+    rxn_smiles: str, bg_opacity: float = 1.0, d2d: MolDraw2D | None = None
+) -> str:
+    rxn = ReactionFromSmarts(rxn_smiles, useSmiles=True)
+    if d2d is None:
+        d2d = MolDraw2DSVG(-1, -1)
+    dopts = d2d.drawOptions()
+    dopts.useBWAtomPalette()
+    dopts.setBackgroundColour((*dopts.getBackgroundColour(), bg_opacity))
+    d2d.DrawReaction(rxn)
+    d2d.FinishDrawing()
+    return d2d.GetDrawingText()
+# Precompiled SMARTS patterns for protected bonds and ring atoms
+_ring_db_pat = Chem.MolFromSmarts("[#6R,#16R]=[OR0,SR0,CR0,NR0]")
+_ring_atom_pat = Chem.MolFromSmarts("[R]")
+bloom_filters: dict[str, BloomFilter] = {}
+def _get_bits(mol: Chem.Mol) -> set[str]:
+    """Get the fingerprint bits from a molecule."""
+    # the keys are the actual bits
+    bi: dict[int, tuple[tuple[int, int], ...]] = {}
+    GetMorganFingerprint(mol, 2, bitInfo=bi)  # type: ignore[arg-type]
+    return {str(k) for k in bi}
+ETHER0_DIR = Path(__file__).parent
+def _get_bloom_filter(name: str) -> BloomFilter:
+    if name in bloom_filters:
+        return bloom_filters[name]
+    bloom_filters[name] = BloomFilter(str(ETHER0_DIR / f"{name}.bloom"))
+    return bloom_filters[name]
+def get_ring_system(mol: Chem.Mol) -> list[str]:
+    """
+    Extracts ring systems from an RDKit molecule and returns a list of SMILES.
+    Bonds not in rings and not protected (e.g., ring carbonyls) are cleaved.
+    Source: https://github.com/PatWalters/useful_rdkit_utils/blob/edb126e3fd71870ae2d1c9440b904106e3ef97a2/useful_rdkit_utils/ring_systems.py#L13
+    Which has a MIT license, copyright 2021-2025 PatWalters.
+    """  # noqa: D205
+    # Copy to avoid mutating original
+    mol = Chem.Mol(mol)
+    # Tag protected bonds
+    for bond in mol.GetBonds():
+        bond.SetBoolProp("protected", False)  # noqa: FBT003
+    for a1, a2 in mol.GetSubstructMatches(_ring_db_pat):
+        b = mol.GetBondBetweenAtoms(a1, a2)
+        b.SetBoolProp("protected", True)  # noqa: FBT003
+    # Cleave linker bonds
+    cleave_idxs = [
+        b.GetIdx()
+        for b in mol.GetBonds()
+        if not b.IsInRing()
+        and not b.GetBoolProp("protected")
+        and b.GetBondType() == Chem.BondType.SINGLE
+    ]
+    if cleave_idxs:
+        frag_mol = Chem.FragmentOnBonds(mol, cleave_idxs)
+        Chem.SanitizeMol(frag_mol)
+    else:
+        frag_mol = mol
+    # Split into fragments and clean up
+    ring_smiles: list[str] = []
+    for frag in Chem.GetMolFrags(frag_mol, asMols=True):
+        if frag.HasSubstructMatch(_ring_atom_pat):
+            for atom in frag.GetAtoms():
+                if atom.GetAtomicNum() == 0:
+                    atom.SetAtomicNum(1)
+                    atom.SetIsotope(0)
+            frag = Chem.RemoveAllHs(frag)  # noqa: PLW2901
+            # Fix stereo on terminal double bonds
+            for bd in frag.GetBonds():
+                if bd.GetBondType() == Chem.BondType.DOUBLE and (
+                    1 in {bd.GetBeginAtom().GetDegree(), bd.GetEndAtom().GetDegree()}
+                ):
+                    bd.SetStereo(Chem.BondStereo.STEREONONE)
+            ring_smiles.append(Chem.MolToSmiles(frag))
+    return ring_smiles
+def is_reasonable_ring_system(mol: Chem.Mol, ref_mol: Chem.Mol | None = None) -> bool:
+    """
+    Check if a molecule has a reasonable ring system.
+    Either no rings or the ring system is found in known rings.
+    If reference is provided, thsos are assumed valid.
+    """
+    bloom_filter = _get_bloom_filter("rings")
+    ring_systems = [canon(r) for r in get_ring_system(mol)]
+    # remove from consideration all rings in ref_mol, since we'll always assume they're correct
+    if ref_mol:
+        ref_ring_systems = [canon(r) for r in get_ring_system(ref_mol)]
+        ring_systems = [ring for ring in ring_systems if ring not in ref_ring_systems]
+    return all((r in bloom_filter) for r in ring_systems)
+def is_reasonable_fp(mol: Chem.Mol, ref_mol: Chem.Mol | None = None) -> bool:
+    """
+    Check if a molecule has a reasonable fingerprint.
+    If reference is provided, those fingerprints are assumed valid.
+    """
+    bloom_filter = _get_bloom_filter("fingerprints")
+    bits: Collection[str] = _get_bits(mol)
+    # remove from consideration all rings in ref_mol, since we'll always assume they're correct
+    if ref_mol:
+        ref_bits = _get_bits(ref_mol)
+        bits = [bit for bit in bits if bit not in ref_bits]
+    return all((b in bloom_filter) for b in bits)
+def mol_from_smiles(smiles: str, *args, **kwargs) -> Chem.Mol | None:
+    """MolFromSmiles is type-hinted to always return Mol, but can return None."""
+    return MolFromSmiles(smiles, *args, **kwargs)

src/ether0/fingerprints.bloom ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9267f986c512f26131eb0219170c12e07ded9c1574c75233da5e3d5f3f00278
+size 12500018

src/ether0/model_prompts.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""Prompts and utilities used for training the ether0 model."""
+import re
+from enum import Enum, StrEnum
+from typing import assert_never
+# Tokens to surround reasoning and answer in XML format
+THINK_START = "<|think_start|>"
+THINK_END = "<|think_end|>"
+ANSWER_START = "<|answer_start|>"
+ANSWER_END = "<|answer_end|>"
+# Keys: True (reasoning + answer), False (answer only)
+# Use strict regex for ether0 models, as we can SFT or RL the models into compliance
+STRICT_XML_ANSWER_SPLIT_PATTERNS: dict[bool, re.Pattern] = {
+    True: re.compile(
+        rf"^\s?{re.escape(THINK_START)}\s*([\s\S]*?)\s*{re.escape(THINK_END)}([\s\S]*?){re.escape(ANSWER_START)}\s*([\s\S]*?)\s*{re.escape(ANSWER_END)}$"
+    ),
+    False: re.compile(
+        rf"^\s?{re.escape(ANSWER_START)}\s*(\S[\s\S]*?)\s*{re.escape(ANSWER_END)}$"
+    ),
+}
+# Use loose regex for other models because:
+# 1. <think> may be out-of-distribution from the model's training data,
+#    so requiring thoughts may degrade performance.
+# 2. We allow baseline models to add extra whitespace and/or preceding or trailing text
+#    around answer XML, again to maximize performance.
+# 3. Similarly, we allow models to ramble for a bit mentioning <answer>,
+#    and then we just keep the last <answer> XML.
+# 4. We want to avoid prompt engineering tricks to get around the previous items.
+LOOSE_XML_ANSWER_LOOSE_PATTERN = r"<answer>\s*(\S[\s\S]*?)\s*<\/answer>"
+class XMLAnswerPrompts(StrEnum):
+    """Enum of prompts to use ."""
+    REASONING_ANSWER = (
+        "A conversation between User and Assistant."
+        " The user asks a question, and the Assistant solves it."
+        " The assistant first thinks about the reasoning process"
+        " in the mind and then provides the user with the answer."
+        " The reasoning process and answer are enclosed within"
+        f" {THINK_START} {THINK_END} and {ANSWER_START} {ANSWER_END} tags,"
+        " respectively, i.e.,"
+        f" {THINK_START} reasoning process here {THINK_END}"
+        f"{ANSWER_START} answer here {ANSWER_END}"
+    )
+    ANSWER_ONLY = (
+        "A conversation between User and Assistant."
+        " The user asks a question, and the Assistant solves it."
+        " The assistant encloses its answer within"
+        f" {ANSWER_START} {ANSWER_END} tags, i.e.,"
+        f" {ANSWER_START} answer here {ANSWER_END}"
+    )
+    @property
+    def pattern(self) -> re.Pattern:
+        return STRICT_XML_ANSWER_SPLIT_PATTERNS[
+            self == XMLAnswerPrompts.REASONING_ANSWER
+        ]
+class SysPrompt(Enum):  # Use Enum over StrEnum for trl.TrlParser compatibility
+    """Possible system prompts for making a conversation to train upon."""
+    SCIENTIFIC_AI = "scientific_ai"
+    def get_sys_prompt(self) -> str:
+        match self:
+            case SysPrompt.SCIENTIFIC_AI:
+                return "You are a scientific reasoning AI assistant."
+            case _:
+                assert_never(self)
+class ProblemPrompt(Enum):  # Use Enum over StrEnum for trl.TrlParser compatibility
+    """Possible user prompts for making a conversation to train upon."""
+    NONE = "none"
+    THINK_ANSWER = "think_answer"
+    ANSWER = "answer"
+    def get_prompt(self) -> str:
+        match self:
+            case ProblemPrompt.NONE:
+                return ""
+            case ProblemPrompt.THINK_ANSWER:
+                return XMLAnswerPrompts.REASONING_ANSWER.value
+            case ProblemPrompt.ANSWER:
+                return XMLAnswerPrompts.ANSWER_ONLY.value
+            case _:
+                assert_never(self)
+def extract_thought_answer_strict(
+    text: str, reasoning: bool
+) -> tuple[str | None, str | None]:
+    """Extract thought and answer from text using a strict XML pattern."""
+    # Use `maxsplit=1` to enforce just one match
+    matches = STRICT_XML_ANSWER_SPLIT_PATTERNS[reasoning].split(text, maxsplit=1)
+    try:
+        _, *inner, suffix = matches
+    except (IndexError, ValueError):
+        return None, None  # Consider no answer or 2+ answers as a failure
+    if reasoning:
+        thought, inter, answer = inner
+    else:
+        thought, inter = None, None
+        (answer,) = inner
+    if (
+        THINK_START not in (thought or "")
+        and THINK_START not in (inter or "")
+        and ANSWER_START not in answer
+        and not suffix
+    ):
+        return thought, answer or None
+    return None, None  # Consider nested answer as a failure
+LOOSE_XML_ANSWER_USER_PROMPT = (
+    "When answering,"
+    " be sure to place the final answer as"
+    " SMILES notation into XML tags <answer></answer>."
+    " An example is <answer>CCO</answer>."
+)
+def extract_answer_loose(text: str | None) -> str:
+    """
+    Extract thought and answer from text using a loose XML pattern.
+    SEE: LOOSE_XML_ANSWER_LOOSE_PATTERN for when to use this.
+    """
+    matches = re.findall(LOOSE_XML_ANSWER_LOOSE_PATTERN, text or "")
+    try:
+        last_answer = matches[-1]  # Last answer in the response
+    except IndexError:
+        return ""  # Consider no answer as a failure
+    if "<answer>" not in last_answer:
+        return last_answer
+    return ""  # Consider nested answer as a failure

src/ether0/models.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import re
+from collections.abc import Collection
+from enum import StrEnum, auto
+from typing import Any
+from datasets import DatasetDict
+from pydantic import BaseModel, Field, model_validator
+from ether0.utils import TDataset
+REWARD_REASON_KEY = "reward_reason"  # Sentinel key
+class RewardReason(StrEnum):
+    FORMAT_FAILED = auto()
+    INVALID_MOL = auto()
+    # Catch-all for invalid values that aren't a molecule or a reaction
+    INVALID_VALUE = auto()
+    # Oracle regression values
+    WRONG_NUMERICAL_ANSWER = auto()
+    # Reaction/retro-synthesis failures
+    INVALID_RXN = auto()
+    WRONG_PRODUCT = auto()
+    PRODUCT_IS_REACTANT = auto()
+    NOT_PURCHASABLE = auto()
+    # Molecule formula/functional group failures
+    WRONG_FORMULA = auto()
+    FAILED_CONSTRAINT = auto()
+    # Unreasonable molecules
+    FAILED_REOS_CHECK = auto()
+    FAILED_RING_CHECK = auto()
+    FAILED_COUNTERION_CHECK = auto()
+    # Really this is a bug, but we don't want to blow up training if a
+    # few bad examples slip through.
+    INVALID_GROUND_TRUTH = auto()
+    # Failover reason if we have an exception during a reward function.
+    # NOTE: not using "failed" or "error" since an unhandled exception
+    # may be something else
+    REWARD_FUNCTION_EXCEPTION = auto()
+    # These are automatically added if no other reason is given
+    WRONG_ANSWER = auto()
+    RIGHT_ANSWER = auto()
+    def set_reason(self, metadata: dict | None) -> None:
+        if metadata is not None:
+            metadata[REWARD_REASON_KEY] = self.value
+    @classmethod
+    def set_default_reason(cls, reward: float, metadata: dict | None) -> None:
+        if metadata is not None and REWARD_REASON_KEY not in metadata:
+            (cls.RIGHT_ANSWER if reward >= 1.0 else cls.WRONG_ANSWER).set_reason(
+                metadata
+            )
+SOLUTION_DELIMITER = "!:!"
+class RewardFunctionInfo(BaseModel):
+    """Metadata used by a reward function to evaluate a solution."""
+    fxn_name: str = Field(description="Name of the reward function to use.")
+    answer_info: str = Field(
+        description="Serialized metadata used by the reward function."
+    )
+    problem_type: str = Field(description="Problem type, for reference.")
+    @model_validator(mode="before")
+    @classmethod
+    def check_card_number_not_present(cls, data: Any) -> Any:
+        if isinstance(data, str):
+            # Deserialize from a string 3-tuple
+            fn, ainfo, pt = data.split(SOLUTION_DELIMITER, maxsplit=2)
+            return {"fxn_name": fn, "answer_info": ainfo, "problem_type": pt}
+        return data
+class QAExample(BaseModel):
+    """Question-answer example with reward function info."""
+    id: str = Field(description="Unique identifier for this example.")
+    problem: str = Field(description="Problem to solve.")
+    problem_type: str = Field(description="Problem type, for reference or filtering.")
+    solution: RewardFunctionInfo = Field(
+        description="Metadata for the reward function."
+    )
+    ideal: str | None = Field(
+        description=(
+            "An optional ideal answer. This could be a candidate SMILES, a log10 of"
+            " water solubility, or None if having an ideal does not make sense."
+        )
+    )
+    unformatted: str | None = Field(
+        description=(
+            "Optional raw data used to generate the problem, used for traceability."
+        )
+    )
+def filter_problem_types(
+    dataset: TDataset, problem_types: str | Collection[str] | None
+) -> TDataset:
+    """Filter a dataset by problem types.
+    Args:
+        dataset: The dataset to filter. Can be a single Dataset or a DatasetDict.
+        problem_types: A string or collection of strings specifying the problem
+            types to filter by.
+            - If None, the original dataset is returned.
+            - If a string or a collection of strings:
+                - Strings starting with "re:" are treated as regex patterns.
+                  If a regex filter is provided, then it must be the only filter.
+                - Strings starting with "!" are treated as problem types to exclude.
+                - Other strings are treated as exact problem types to include.
+                - Mixing inclusion and exclusion rules (e.g. ["type_a", "!type_b"])
+                  is not allowed.
+    Returns:
+        The filtered dataset.
+    """
+    if problem_types is None:
+        return dataset
+    if isinstance(problem_types, str):  # Assume single problem type as a string
+        problem_types = [problem_types]
+    problem_types = {pt.strip() for pt in problem_types}
+    columns = (
+        next(iter(dataset.values())) if isinstance(dataset, DatasetDict) else dataset
+    ).column_names
+    # ether0-benchmark uses 'problem_type'; some variants may use 'type'
+    type_col = "problem_type" if "problem_type" in columns else "type"
+    if any(pt.startswith("re:") for pt in problem_types):
+        # A regex was passed in
+        if len(problem_types) != 1:
+            raise ValueError(
+                "If filtering by regex, only one filter is supported,"
+                f" passed {problem_types}."
+            )
+        regex = re.compile(next(iter(problem_types)).removeprefix("re:"))
+        def filter_func(x):
+            return regex.match(x[type_col]) is not None
+    else:
+        # Treat as exact string match
+        valid_problem_types = {pt for pt in problem_types if not pt.startswith("!")}
+        invalid_problem_types = {
+            pt.removeprefix("!") for pt in problem_types if pt.startswith("!")
+        }
+        if valid_problem_types:
+            if invalid_problem_types:
+                raise ValueError(
+                    "Cannot specify both problem types to keep and to exclude,"
+                    f" passed {problem_types}."
+                )
+            def filter_func(x):
+                return x[type_col] in valid_problem_types
+        else:
+            def filter_func(x):
+                return x[type_col] not in invalid_problem_types
+    return dataset.filter(filter_func, desc="Filtering problem types")

src/ether0/problem_prompts.py ADDED Viewed

	@@ -0,0 +1,196 @@

+"""Prompt templates used for problems in the ether0 dataset."""
+# ruff: noqa: E501, W505
+NAME_IUPAC_PROMPTS = [
+    "What are the SMILES from the IUPAC name: {iupac}?",
+    "Could you please tell me the SMILES representation for {iupac}?",
+    "I have {iupac}. What would its SMILES be?",
+    "Provide the SMILES string for the molecule named {iupac}.",
+    "Convert this IUPAC name into a SMILES format: {iupac}.",
+    "Give me the SMILES notation for the compound {iupac}.",
+    "What SMILES corresponds to the IUPAC chemical name {iupac}?",
+    "How can I represent {iupac} as a SMILES string?",
+    "Generate the SMILES structure for this compound: {iupac}.",
+    "If the molecule is called {iupac}, what's the SMILES representation?",
+]
+NAME_SMILES_PROMPTS = [
+    "What is the IUPAC name of this molecule: {smiles}?",
+    "Could you please tell me the IUPAC name for the compound represented by the SMILES string: {smiles}?",
+    "I have a molecule here with the SMILES notation {smiles}. What would its IUPAC name be?",
+    "I'm working with a chemical compound, and its SMILES representation is {smiles}. Can you help me determine its IUPAC name?",
+    "What is the correct IUPAC nomenclature for a molecule with the SMILES code {smiles}?",
+    "I'm trying to identify a compound. Its SMILES string is {smiles}. What's its IUPAC name?",
+    "If I provide you with the SMILES string of a molecule, which is {smiles}, could you generate its IUPAC name for me?",
+    "I've encountered a molecule in my research, denoted by the SMILES {smiles}. I'd appreciate it if you could tell me its IUPAC name.",
+    "Can you derive the IUPAC name from this SMILES representation: {smiles}?",
+    "For a compound with the structural representation given by the SMILES {smiles}, what is the corresponding IUPAC name?",
+]
+REACTION_PROMPTS = [
+    "What is the product of this reaction? {rxn_smiles}",
+    "If {rxn_smiles} are allowed to react, what would the resulting molecule be in SMILES format?",
+    "Can you predict the outcome of this chemical reaction? {rxn_smiles}",
+    "I have a reaction scheme here: {rxn_smiles}. What will be generated as the product?",
+    "Assuming the reaction {rxn_smiles} goes to completion, what is the SMILES representation of the product?",
+    "In this reaction: {rxn_smiles}, what compound is formed?",
+    "Given the reactants and conditions in {rxn_smiles}, what is the expected product?",
+    "After performing the reaction {rxn_smiles}, what would I obtain as the main product?",
+    "If I mix these reactants under the specified conditions, {rxn_smiles}, what is the SMILES of the product?",
+    "Please provide the SMILES string of the product for this reaction: {rxn_smiles}",
+]
+NAME_REACTION_PROMPTS = [
+    "What is the name of this reaction?\n{rxn_smiles}",
+    "I have a reaction here with {rxn_smiles}. Can you tell me its commonly used name?",
+    "Given the reaction represented by {rxn_smiles}, what is the standard name associated with it?",
+    "If I were to describe this reaction, {rxn_smiles}, in a textbook, what reaction name would I use?",
+    "I'm trying to identify this reaction: {rxn_smiles}. What is its well-known name?",
+    "The reaction {rxn_smiles} is taking place. What's the name of this type of transformation?",
+    "I came across this reaction pathway: {rxn_smiles}. Do you know the name it generally goes by?",
+    "In the context of organic chemistry, what is the established name for the reaction shown here: {rxn_smiles}?",
+    "I'm writing a lab report and need to name this reaction, {rxn_smiles}. What should I call it?",
+    "Can you identify the name of the reaction that follows this scheme: {rxn_smiles}?",
+]
+COMPLETE_MOL_PROMPTS = [
+    "I have a partial molecule represented by the SMILES string {smiles}. What is a valid completion of this molecule, providing only the remaining characters in SMILES format?",
+    "Given the incomplete SMILES fragment {smiles}, can you suggest a realistic ending to complete the molecule? Please provide only the additional SMILES characters needed.",
+    "I'm working with a molecule that's partially described as {smiles}. What sequence of SMILES characters would you add to make it a complete, valid molecule?",
+    "The beginning of a molecule's SMILES representation is {smiles}. How would you finish this SMILES string to represent a viable chemical compound? Only provide the continuation of the SMILES.",
+    "Imagine you need to complete the SMILES string {smiles}. What's a plausible way to extend it to form a complete molecule, expressed as the remaining SMILES characters?",
+    "If I give you the partial SMILES {smiles}, what's a reasonable way to finish it off to create a valid molecule? Respond with just the additional SMILES characters.",
+    "I'm trying to construct a molecule, and I have the start of its SMILES: {smiles}. Could you provide a completion for it, ensuring the final molecule is realistic? Only give me the rest of the SMILES string.",
+    "Here's a fragment of a SMILES string: {smiles}. What would be a chemically sound way to complete it? Respond with the missing portion of the SMILES representation.",
+    "Suppose you have the incomplete molecular structure {smiles} in SMILES. How would you complete it to represent a real molecule, adding only the necessary SMILES characters?",
+    "I have an unfinished molecule represented by the SMILES fragment {smiles}. Can you help me complete it by suggesting the remaining SMILES characters needed to make it a valid chemical structure?",
+]
+MOL_FORMULA_PROMPTS = [
+    "A compound with formula {formula} was isolated from {source}. What is a plausible SMILES for it given this organism?",
+    "{source} makes a compound with this formula: {formula}. What SMILES structure might correspond to it?",
+    "In {source}, I found a substance with formula {formula}. What biosynthetically plausible SMILES might this represent?",
+    "Analysis of {source} revealed a compound ({formula}). What SMILES structure aligns with this organism's metabolism?",
+    "The organism {source} contains a compound with formula {formula}. What's a likely SMILES based on its biochemistry?",
+    "A {formula} compound was extracted from {source}. Based on this organism, what's a probable SMILES structure?",
+    "What SMILES could have the formula {formula} and be isolated from {source}?",
+    "What would be a biologically relevant SMILES for a {formula} compound isolated from the organism {source}?",
+    "The organism {source} produced a compound with formula {formula}, what SMILES structure makes biosynthetic sense?",
+    "A {formula} metabolite from {source} was identified. What's a biologically plausible compound for this (as SMILES)?",
+]
+FUNCTIONAL_GROUP_PROMPTS = [
+    "Propose a compound with molecular formula {formula} that contains the following functional groups: {functional_group}.",
+    "Suggest a SMILES structure for a molecule with formula {formula} and the following functional groups: {functional_group}.",
+    "Given that a compound has formula {formula}, propose SMILES for one that also contains these groups: {functional_group}",
+    "Provide a reasonable SMILES for a chemical with molecular formula {formula} and these groups: {functional_group}.",
+    "Generate a SMILES representation for a molecule containing groups: {functional_group}. It should also have formula {formula}.",
+    "Identify a plausible SMILES for a chemical compound with formula {formula} containing these groups: {functional_group}.",
+]
+PROPERTY_TRIPLET_PROMPTS = [
+    "I have a molecule {smiles1} with a {property} of {value1}. Which of these similar molecules will most likely {change} this property?\n{options}",
+    "Given a molecule ({smiles1}) having a {property} of {value1}, select the modified molecule below that would {change} this property significantly:\n{options}",
+    "Molecule {smiles1} currently exhibits {property} of {value1}. Which modifications from the list below would effectively {change} it?\n{options}",
+    "If molecule {smiles1} has a {property} value of {value1}, which of the following options would best {change} this property?\n{options}",
+    "Considering {smiles1} has a measured {property} of {value1}, which candidate modification listed would most effectively {change} this property?\n{options}",
+    "Molecule {smiles1} demonstrates a {property} of {value1}. Which similar molecule below is best suited to {change} this characteristic?\n{options}",
+    "Given molecule {smiles1} with {property} at {value1}, identify which molecule among the following options would {change} it most effectively:\n{options}",
+    "Starting from molecule {smiles1}, which shows a {property} of {value1}, choose the structural change below that would notably {change} this property:\n{options}",
+    "The molecule {smiles1} has a {property} of {value1}. Which molecule listed would optimally {change} this value?\n{options}",
+    "Given a {property} of {value1} for molecule {smiles1}, pick the best molecule from below to {change} this property:\n{options}",
+]
+# I have a molecule {smiles1} which is not a blood-brain barrier penetrating. Which of these similar molecules will most likely have this property?\n{options}",
+PROPERTY_TRIPLET_PROMPTS_CAT = [
+    "I have a molecule {smiles1} which {rel} {property}. Which of these similar molecules will most likely {irel} this property?\n{options}",
+    "Given molecule {smiles1} that {rel} {property}, which molecule below is likely to {irel} this property?\n{options}",
+    "Molecule {smiles1} currently {rel} {property}. Choose from these similar molecules the one most likely to {irel} this property:\n{options}",
+    "Considering {smiles1} {rel} {property}, identify which of the following candidates will most likely {irel} the characteristic:\n{options}",
+    "Given that molecule {smiles1} {rel} {property}, select from below the molecule most expected to {irel} this characteristic:\n{options}",
+    "Starting from molecule {smiles1} which {rel} {property}, determine which listed molecule is most likely to {irel} this property:\n{options}",
+    "If molecule {smiles1} {rel} {property}, which of these related structures will most probably {irel} that property?\n{options}",
+    "Given molecule {smiles1} {rel} {property}, select the similar molecule listed below most likely to {irel} this property:\n{options}",
+]
+# Which of the following options likely is a blood-brain barrier penetrating molecule?
+# Which of the following options likely is not a blood-brain barrier penetrating molecule?
+# Which of the following molecules is likely to not be blood-brain barrier penetrating?
+PROPERTY_PROMPTS_CAT = [
+    "Which of the following options likely is{rel} a {property} molecule?\n{options}",
+    "Which of the following molecules is likely to{rel} be {property}?\n{options}",
+    "Identify the molecule below that likely is{rel} a {property} molecule:\n{options}",
+    "From the list below, select the molecule most likely to{rel} be {property}:\n{options}",
+    "Choose the molecule from the options below that most probably is{rel} {property}:\n{options}",
+    "Among the following, which molecule likely is{rel} considered {property}?\n{options}",
+    "Select the molecule below most expected to{rel} have {property} properties:\n{options}",
+    "From these molecules, identify the one most likely to{rel} possess {property}:\n{options}",
+    "Which candidate below most probably is{rel} classified as a {property} molecule?\n{options}",
+]
+PROPERTY_PROMPTS = [
+    "Which of the following molecules likely has a {property} of {value}?\n{options}",
+    "Identify the molecule below expected to have a {property} around {value}:\n{options}",
+    "From these options, select the molecule most likely exhibiting {property} of {value}:\n{options}",
+    "Determine which of the following molecules likely shows a {property} near {value}:\n{options}",
+    "Choose the molecule that would most plausibly have a {property} of {value} from the list below:\n{options}",
+    "Among the following, which molecule is predicted to have a {property} close to {value}?\n{options}",
+    "Given the choices below, pick the molecule most likely to possess a {property} of {value}:\n{options}",
+    "Select the molecule from these candidates that probably has a {property} of {value}:\n{options}",
+    "Which molecule listed here is most likely to have a {property} approximately equal to {value}?\n{options}",
+    "Identify which of the following molecules will most likely have a {property} of {value}:\n{options}",
+]
+RETRO_PROMPTS = [
+    "Propose a 1-step synthesis for the molecule {smiles} using likely purchasable reactants.",
+    "Given the molecule {smiles}, suggest a 1-step synthesis using commercially available starting materials.",
+    "What is a plausible 1-step reaction for the molecule {smiles} using common reactants?",
+    "Suggest a commercially feasible one-step route to synthesize {smiles}.",
+    "Outline a practical single-step synthetic method to prepare the molecule {smiles}.",
+    "Design a straightforward 1-step reaction scheme for synthesizing {smiles} using commercially available reagents.",
+    "Identify a likely accessible precursor and reaction for a single-step synthesis of {smiles}.",
+    "Provide a realistic single-step synthetic pathway to obtain {smiles} from common chemicals.",
+    "Propose a viable one-step synthetic route toward the molecule {smiles} starting from purchasable precursors.",
+    "Suggest one plausible reaction step to generate {smiles} using standard, commercially sourced reactants.",
+]
+ORACLE_SOLUBILITY_PROMPTS = {
+    "tanimoto": [
+        "Propose a small change to {smiles} to {direction} its solubility by about 1 logS.",
+        "Suggest a minimal structural modification to {smiles} that would {direction} its solubility by approximately 1 logS unit.",
+        "What minor alteration could be made to {smiles} to {direction} its solubility by roughly 1 logS?",
+        "Design a small molecular change to {smiles} that would {direction} its solubility by about 1 logS while maintaining overall similarity.",
+        "Identify a small structural adjustment to {smiles} that would {direction} its aqueous solubility by approximately 1 logS unit.",
+    ],
+    "scaffold": [
+        "Change {smiles} to {direction} its solubility by about 1 logS, but keep its scaffold",
+        "Modify {smiles} to {direction} its solubility by approximately 1 logS while preserving the core scaffold structure.",
+        "Suggest alterations to {smiles} that would {direction} its solubility by about 1 logS unit without changing the molecular scaffold.",
+        "How could {smiles} be transformed to {direction} its solubility by roughly 1 logS while maintaining its scaffold?",
+        "Design a derivative of {smiles} with {direction}d solubility (by about 1 logS) that retains the same molecular scaffold.",
+    ],
+    "groups": [
+        "Adjust {smiles} to {direction} its solubility by about 1 logS, but keep the following groups intact: {pretty_groups}",
+        "Modify {smiles} to achieve a {direction} in solubility of approximately 1 logS while preserving these functional groups: {pretty_groups}",
+        "How would you alter {smiles} to {direction} its solubility by about 1 logS unit without changing these key groups: {pretty_groups}?",
+        "Suggest structural changes to {smiles} that would {direction} its solubility by roughly 1 logS while maintaining these groups: {pretty_groups}",
+        "Design a variant of {smiles} with {direction}d solubility (by about 1 logS) that retains all of these intact functional groups: {pretty_groups}",
+    ],
+}
+SMILES_FROM_FORMULA_PROMPTS = [
+    "Propose a molecule that has the following formula: {formula}.",
+    "Generate a SMILES representation for a compound with the formula {formula}.",
+    "What is a plausible SMILES for a compound with the formula {formula}?",
+    "Given the formula {formula}, can you suggest a possible SMILES structure?",
+    "Create a SMILES representation for a molecule that corresponds to the formula {formula}.",
+    "Identify a potential SMILES for a compound with the molecular formula {formula}.",
+    "What SMILES structure could correspond to the formula {formula}?",
+    "Generate a plausible SMILES for a compound with the formula {formula}.",
+    "Given the formula {formula}, what would be a reasonable SMILES representation?",
+    "Propose a SMILES structure for a molecule with the formula {formula}.",
+    "Generate a SMILES representation for a compound with the formula {formula}.",
+]

src/ether0/py.typed ADDED Viewed

File without changes

src/ether0/rewards.py ADDED Viewed

	@@ -0,0 +1,753 @@

+import ast
+import logging
+import operator
+import re
+import unicodedata
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Protocol, cast
+import exmol
+from pydantic import JsonValue
+from rdkit import Chem, DataStructs
+from rdkit.Chem import GetMolFrags, SanitizeMol  # pylint: disable=no-name-in-module
+from rdkit.Chem.rdMolDescriptors import (  # pylint: disable=no-name-in-module
+    CalcMolFormula,
+    GetMorganFingerprintAsBitVect,
+)
+from rdkit.Chem.rdmolfiles import MolToSmiles  # pylint: disable=no-name-in-module
+from rdkit.rdBase import BlockLogs
+from ether0.clients import fetch_forward_rxn, fetch_purchasable, fetch_solubility
+from ether0.data import is_reasonable_fp, is_reasonable_ring_system, mol_from_smiles
+from ether0.model_prompts import extract_answer_loose, extract_thought_answer_strict
+from ether0.models import RewardFunctionInfo, RewardReason
+block = BlockLogs()
+logger = logging.getLogger(__name__)
+class RewardEvalFn(Protocol):
+    def __call__(
+        self,
+        yhat: str,
+        y: str,
+        soft: bool = False,
+        test: bool = False,
+        metadata: dict[str, JsonValue] | None = None,
+    ) -> float: ...
+def formula_diff(formula1: str, formula2: str) -> float:
+    """Calculate l2 norm between two molecular formulas."""
+    # important = elements we care about in organic chem
+    important_elements = {"C", "H", "O", "N", "F", "Cl", "Br", "P", "S"}
+    pattern = re.compile(r"([A-Z][a-z]?)(\d*)")
+    counts1 = dict.fromkeys(important_elements, 0)
+    counts2 = dict.fromkeys(important_elements, 0)
+    for m in pattern.finditer(formula1):
+        element = m.group(1)
+        count = int(m.group(2)) if m.group(2) else 1
+        if element in important_elements:
+            counts1[element] += count
+    for m in pattern.finditer(formula2):
+        element = m.group(1)
+        count = int(m.group(2)) if m.group(2) else 1
+        if element in important_elements:
+            counts2[element] += count
+    d2 = sum((counts1[k] - counts2[k]) ** 2 for k in important_elements)
+    return d2**0.5
+def format_reward(
+    completions,
+    reasoning: bool,
+    reward: float = 1.0,
+    **kwargs,  # noqa: ARG001
+) -> list[float]:
+    """Reward function that checks if the completion has a specific format."""
+    if isinstance(completions[0], list):
+        completion_contents = [completion[0]["content"] for completion in completions]
+    else:
+        completion_contents = completions
+    # Note we check `answer is not None` since empty answer still counts as valid
+    # formatting.
+    return [
+        reward if answer is not None else 0.0
+        for answer in (
+            extract_thought_answer_strict(c, reasoning)[1] for c in completion_contents
+        )
+    ]
+SUPERSCRIPT_PATTERN = re.compile(r"\^{([\d,]+)}")
+ITALICS_PATTERN = re.compile(r"{([a-zA-Z])}")
+# parentheses that aren't nested or contain hyphens
+# https://regex101.com/r/6c8smX/1
+USELESS_PARENTHESES = re.compile(r"([-\d])[\(\[{]([A-Za-z0-9]+)[\]\)}]-")
+def normalize_iupac(s: str) -> str:
+    """Normalize an IUPAC name by removing special formatting and characters.
+    Args:
+        s: Original IUPAC name.
+    Returns:
+        A normalized IUPAC name without special characters.
+    """
+    s = s.strip().casefold()
+    # replace ^{n} with ^(n)
+    s = SUPERSCRIPT_PATTERN.sub(r"^(\1)", s)
+    # remove italicized pattern - but don't match ^{1,5} (by avoiding matching commas)
+    s = ITALICS_PATTERN.sub(r"\1", s)
+    # remove garbage
+    s = s.replace("$", "").replace("~", "")  # noqa: FURB184
+    # remove parentheses that aren't nested or contain hyphens
+    s = USELESS_PARENTHESES.sub(r"\1\2-", s)
+    # ok to ignore carrots and hpyhens for comparison
+    return s.replace("^", "").replace(" ", "-")  # noqa: FURB184
+def normalize_unicodes(s: str) -> str:
+    """Normalize all Unicode dashes/hyphens to regular hyphen.
+    Args:
+        s: Input string with potential Unicode characters.
+    Returns:
+        Unicode-normalized string.
+    """
+    s = unicodedata.normalize("NFKC", s)
+    s = "".join("-" if unicodedata.category(c) in {"Pd", "Po"} else c for c in s)
+    return s.replace("-", "")  # minus sign  # noqa: FURB184
+def is_reasonable_molecule(
+    mol: Chem.Mol,
+    metadata: dict[str, JsonValue] | None,
+    test: bool,  # noqa: ARG001
+    ref_mol: Chem.Mol | None = None,
+) -> bool:
+    """Returns True if the molecule passes heuristics for being a reasonable molecule."""
+    # always check valence
+    try:
+        SanitizeMol(mol)
+    except Exception:
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return False
+    # We have decided that the convention will be to check the
+    # same at test time and train time.
+    # determine if we have counter-ions (which is fine), but we want to
+    # evaluate the largest molecule only. We only consider single molecules
+    # or single molecules + a counterion as valid responses
+    sorted_frags = sorted(  # sort by size
+        GetMolFrags(mol, asMols=True), key=lambda m: m.GetNumAtoms(), reverse=True
+    )
+    if len(sorted_frags) > 2:  # noqa: PLR2004
+        # not a counter-ion
+        RewardReason.FAILED_COUNTERION_CHECK.set_reason(metadata)
+        return False
+    if len(sorted_frags) == 2:  # noqa: PLR2004
+        # If 2, assume first is counter-ion, and double check it's small
+        cmol = sorted_frags[1]
+        if cmol.GetNumHeavyAtoms() > 5:  # noqa: PLR2004
+            RewardReason.FAILED_COUNTERION_CHECK.set_reason(metadata)
+            return False
+    mol = sorted_frags[0]
+    ring_status = is_reasonable_ring_system(mol, ref_mol)
+    if not ring_status:
+        RewardReason.FAILED_RING_CHECK.set_reason(metadata)
+        return False
+    failure = is_reasonable_fp(mol, ref_mol)
+    if not failure:
+        RewardReason.FAILED_REOS_CHECK.set_reason(metadata)
+        return False
+    return True
+FULL_SMILES_KEY = "full_smiles"
+def set_full_smiles(smiles: str, metadata: dict[str, JsonValue] | None) -> None:
+    if metadata is not None:
+        metadata[FULL_SMILES_KEY] = smiles
+BAD_SMARTS_PATTERNS = [
+    "[#16]-[#16]-[#16]",  # More than a thiol bond
+    "[#8]~[#8]",  # Peroxides
+    "[#7]-[NH2]",  # Hydrazines
+    "[#7]-[NH3]",  # weird charged amine
+    "[#7]~[#7]~[#7]",  # 3 or more amines
+    "[NX2](=[OX1])[O;$([X2]),$([X1-])]",  # Nitrite
+    "[SX2][NX2]=[OX1]",  # Thionitrite
+    "[$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])]",  # Nitrate  # noqa: E501
+    "[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]",  # Nitro
+    "[NX2](=[OX1])[!#7;!#8]",  # Nitroso
+    "[CX4]" + ("-[CX4]" * 6),  # Long chain of carbons (7 or more)
+]
+def contains_bad_substruct(mol: Chem.Mol) -> bool:
+    return any(
+        mol.HasSubstructMatch(Chem.MolFromSmarts(pat)) for pat in BAD_SMARTS_PATTERNS
+    )
+def rxn_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,  # noqa: ARG001
+    test: bool = False,  # noqa: ARG001
+    metadata: dict[str, JsonValue] | None = None,  # noqa: ARG001
+) -> float:
+    """Returns 1.0 if strings match (case-insensitive), otherwise 0.0."""
+    # some normalization for IUPAC names - shouldn't affect others
+    if normalize_iupac(yhat) == normalize_iupac(y):
+        return 1.0
+    # If that fails (would return 0), try normalizing further
+    return (
+        1.0
+        if normalize_unicodes(normalize_iupac(yhat))
+        == normalize_unicodes(normalize_iupac(y))
+        else 0.0
+    )
+def str_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,  # noqa: ARG001
+    test: bool = False,  # noqa: ARG001
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Returns 1.0 if strings match (case-insensitive), otherwise 0.0."""
+    set_full_smiles(yhat, metadata)
+    return 1.0 if normalize_iupac(yhat) == normalize_iupac(y) else 0.0
+def valid_mol_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,  # noqa: ARG001
+    test: bool = False,
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Validate if yhat is a valid SMILES string, when appended to y.
+    Args:
+        yhat: Model-predicted SMILES string or partial completion.
+        y: Base SMILES string (e.g. "O=C1CCC2=CC=C(O)C(OC)=C2C#CCC2=CC3=C4") to append
+            yhat and check validity.
+        test: unused
+        soft: unused
+        metadata: optional metadata dictionary
+    Returns:
+        1.0 if `y + yhat` is a valid SMILES string, 0.0 otherwise.
+    """
+    if not yhat:
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return 0.0
+    # First attempt yhat alone (assuming full SMILES), then try y+yhat (assuming
+    # partial) if that fails
+    for smiles in (yhat, y + yhat):
+        if not smiles.startswith(y):
+            # only accept a solution containing the answer
+            continue
+        try:
+            mol = mol_from_smiles(smiles)
+        except Exception:
+            logger.exception(
+                f"Failed to construct molecule from SMILES string {yhat!r}."
+            )
+            continue
+        if mol is not None:
+            set_full_smiles(smiles, metadata)
+            if not is_reasonable_molecule(mol, metadata, test):
+                return 0.0
+            return 1.0
+    # Nothing worked - mark as invalid
+    RewardReason.INVALID_MOL.set_reason(metadata)
+    return 0.0
+SMOOTH_THRESHOLD_TANIMOTO_SIMILARITY = 0.7  # close enough
+def tanimoto_similarity(
+    m1: Chem.Mol | None, m2: Chem.Mol | None, atom_threshold: float = 10.0
+) -> float:
+    """Calculate Tanimoto similarity between two molecules.
+    The `atom_threshold` parameter is a relative fraction (e.g., `0.2` for 20%)
+    that sets a threshold for degenerate cases when the fingerprints are similar,
+    but there are many more atoms in one molecule.
+    Default is 10.0, which corresponds to a 1000% difference and has no practical effect.
+    """
+    if m1 is None or m2 is None:
+        return 0.0
+    fp1 = GetMorganFingerprintAsBitVect(m1, 2)
+    fp2 = GetMorganFingerprintAsBitVect(m2, 2)
+    # heavy atom threshold
+    atoms1 = m1.GetNumHeavyAtoms()
+    atoms2 = m2.GetNumHeavyAtoms()
+    if (denom := max(atoms1, atoms2)) > 0:
+        # Do not apply the atom diff check if there are no heavy atoms.
+        # This is always safe, since the only way to avoid
+        # this block is if m1=m2=H2, which would pass anyway.
+        atom_diff = abs(atoms1 - atoms2) / denom
+        if atom_diff > atom_threshold:
+            return 0.0
+    return DataStructs.TanimotoSimilarity(fp1, fp2)
+def exact_mol_match(m1: Chem.Mol, m2: Chem.Mol) -> float:
+    s1 = MolToSmiles(m1, canonical=True, isomericSmiles=True)  # noqa: FURB120
+    s2 = MolToSmiles(m2, canonical=True, isomericSmiles=True)  # noqa: FURB120
+    return 1.0 if s1 == s2 else 0.0
+def get_largest_mol(smiles: str) -> Chem.Mol | None:
+    parts = smiles.split(".")
+    # Filter out small fragments (removes counter-ions) and invalid SMILES
+    mols = [
+        mol_from_smiles(p)
+        for p in parts
+        if (len(p) > 3 and mol_from_smiles(p) is not None)  # noqa: PLR2004
+    ]
+    if not mols:
+        return None
+    mols_atoms = []
+    for mol in mols:
+        n_atoms = None if mol is None else mol.GetNumAtoms()
+        if n_atoms is None:
+            raise NotImplementedError(f"Didn't handle {mol=} having None atoms.")
+        mols_atoms.append((mol, n_atoms))
+    return max(mols_atoms, key=operator.itemgetter(1))[0]
+def product_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,
+    test: bool = False,  # noqa: ARG001
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Computes the Tanimoto similarity of the largest fragments from two SMILES (if soft) or exact match (if not soft).
+    Returns:
+        Reward in [0, 1].
+    """  # noqa: E501,W505
+    m1 = get_largest_mol(yhat)
+    m2 = get_largest_mol(y)
+    if m1 is None:
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return 0.0
+    if m2 is None:
+        RewardReason.INVALID_GROUND_TRUTH.set_reason(metadata)
+        logger.warning(f"Invalid ground truth molecule {y!r}.")
+        return 0.0
+    # don't use yhat directly since it may have multiple molecules
+    set_full_smiles(MolToSmiles(m1), metadata)
+    if soft:
+        return tanimoto_similarity(m1, m2)
+    return exact_mol_match(m1, m2)  # exact match for non-soft mode
+def caption_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,
+    test: bool = False,
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Currently forwards to product_eval, but also stores Tanimoto in metadata."""
+    if metadata is not None:
+        m1 = get_largest_mol(yhat)
+        m2 = get_largest_mol(y)
+        metadata["tanimoto"] = (
+            tanimoto_similarity(m1, m2) if (m1 is not None and m2 is not None) else 0.0
+        )
+    return product_eval(yhat, y, soft, test, metadata)
+def formula_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,
+    test: bool = False,
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Check correct formula and Tanimoto similarity, giving a reward in [0, 1]."""
+    set_full_smiles(yhat, metadata)
+    mhat = mol_from_smiles(yhat)
+    m = mol_from_smiles(y)
+    if mhat is None:
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return 0.0
+    if m is None:
+        RewardReason.INVALID_GROUND_TRUTH.set_reason(metadata)
+        logger.warning(f"Invalid ground truth molecule {y!r}.")
+        return 0.0
+    fhat = CalcMolFormula(mhat)
+    f = CalcMolFormula(m)
+    if fhat != f:
+        RewardReason.WRONG_FORMULA.set_reason(metadata)
+        return 0.0
+    if not is_reasonable_molecule(mhat, metadata, test, ref_mol=m):
+        return 0.0
+    return (
+        1.0
+        if tanimoto_similarity(mhat, m) >= SMOOTH_THRESHOLD_TANIMOTO_SIMILARITY
+        # Give partial credit if soft=True and we got the right formula
+        else (0.5 if soft else 0.0)
+    )
+def functional_group_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,
+    test: bool = False,
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Match functional group and formula, giving a reward in [0, 1]."""
+    set_full_smiles(yhat, metadata)
+    mhat = mol_from_smiles(yhat)
+    if mhat is None:
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return 0.0
+    y_args: tuple[str, list[str]] = ast.literal_eval(y)
+    formula = y_args[0]
+    groups = {g.lower() for g in y_args[1]}
+    fhat = CalcMolFormula(mhat)
+    if fhat != formula:
+        RewardReason.WRONG_FORMULA.set_reason(metadata)
+        return 0.0
+    groupshat: set[str] = {
+        f.lower() for f in exmol.get_functional_groups(mhat, return_all=True)
+    }
+    if not is_reasonable_molecule(mhat, metadata, test):
+        return 0.0
+    return (
+        1.0
+        if groups <= groupshat
+        # Give partial credit if soft=True and we got the right formula
+        else (0.5 if soft else 0.0)
+    )
+def oracle_solubility_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,  # noqa: ARG001
+    test: bool = False,
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Evaluate solubility prediction using remote, giving a reward in [0, 1]."""
+    set_full_smiles(yhat, metadata)
+    # we only want single molecules
+    if "." in yhat:
+        return 0.0
+    mhat = mol_from_smiles(yhat)
+    if mhat is None:
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return 0.0
+    y_args: tuple[str, str | list[str], float | str, str] = ast.literal_eval(y)
+    constraint_type, constraint_data = y_args[:2]
+    target = float(y_args[2])
+    # Unused: direction = y_args[3]  # noqa: ERA001
+    ref_mol: Chem.Mol | None = None
+    # first check constraint
+    if constraint_type == "scaffold":
+        ref_mol = mol_from_smiles(cast(str, constraint_data))
+        if ref_mol is None:
+            raise NotImplementedError(
+                f"Didn't handle when {constraint_data=} is invalid."
+            )
+        if not mhat.HasSubstructMatch(ref_mol):
+            RewardReason.FAILED_CONSTRAINT.set_reason(metadata)
+            return 0.0
+    elif constraint_type == "groups":
+        groups = [g.lower() for g in exmol.get_functional_groups(mhat, return_all=True)]
+        if not any(group.lower() in groups for group in constraint_data):
+            RewardReason.FAILED_CONSTRAINT.set_reason(metadata)
+            return 0.0
+    elif constraint_type == "tanimoto":
+        ref_mol = mol_from_smiles(cast(str, constraint_data))
+        if (
+            tanimoto_similarity(mhat, ref_mol, atom_threshold=0.2)
+            < SMOOTH_THRESHOLD_TANIMOTO_SIMILARITY
+        ):
+            RewardReason.FAILED_CONSTRAINT.set_reason(metadata)
+            return 0.0
+    else:
+        raise ValueError(f"Unknown constraint type: {constraint_type}")
+    if not is_reasonable_molecule(mhat, metadata, test, ref_mol=ref_mol):
+        return 0.0
+    # make sure we hit the target
+    result = fetch_solubility(yhat)
+    if "solubility" in result:
+        solubility = result["solubility"]
+        delta = solubility - target
+        # hard coded to typical solubility accuracies
+        # we subtract 0.01 because some questions ask for
+        # 0.5 change and we don't want restatements to
+        # be matches
+        if abs(delta) > (0.5 - 0.01):
+            RewardReason.WRONG_NUMERICAL_ANSWER.set_reason(metadata)
+            return 0.0
+        return 1.0
+    RewardReason.INVALID_MOL.set_reason(metadata)
+    return 0.0
+def oracle_rxn_eval(
+    yhat: str,
+    y: str,
+    soft: bool = False,
+    test: bool = False,  # noqa: ARG001
+    metadata: dict[str, JsonValue] | None = None,
+) -> float:
+    """Evaluate forward reaction prediction using remote, giving a reward in [0, 1]."""
+    if ">" not in yhat or "." not in yhat:
+        RewardReason.INVALID_RXN.set_reason(metadata)
+        return 0.0
+    # make sure there are not more than two angle brackets
+    if yhat.count(">") > 2:  # noqa: PLR2004
+        RewardReason.INVALID_RXN.set_reason(metadata)
+        return 0.0
+    # ok now do real check on regex after heuristic checks
+    # adapted partly from https://gist.github.com/lsauer/1312860/264ae813c2bd2c27a769d261c8c6b38da34e22fb
+    # https://regex101.com/r/9bdE6H/1
+    # basically SMILES_THINGS>SMILES_THINGS | empty>
+    if not re.match(
+        r"^[^J][a-z0-9@+\-\[\]\(\)\\\/%=#$\.]{6,}>[a-z0-9@+\-\[\]\(\)\\\/%=#$\.]{0,}>",
+        yhat,
+        re.IGNORECASE,  # lower = aromatic, which we're fine matching
+    ):
+        RewardReason.INVALID_RXN.set_reason(metadata)
+        return 0.0
+    ymol = mol_from_smiles(y)
+    if ymol is None:
+        RewardReason.INVALID_GROUND_TRUTH.set_reason(metadata)
+        logger.warning(f"Invalid ground truth molecule {y!r}.")
+        return 0.0
+    reactant_smi = yhat.split(">")[0].split(".")
+    reactants = [mol_from_smiles(r) for r in reactant_smi]
+    if not all(x is not None for x in reactants):
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return 0.0
+    reagents = [mol_from_smiles(r) for r in yhat.split(">")[1].split(".") if r.strip()]
+    if not all(x is not None for x in reagents):
+        RewardReason.INVALID_MOL.set_reason(metadata)
+        return 0.0
+    # check products, if present, contain the desired product
+    products = [mol_from_smiles(r) for r in yhat.split(">")[2].split(".") if r.strip()]
+    # notice we pass if there are no products
+    if products:
+        if not all(x is not None for x in products):
+            RewardReason.INVALID_MOL.set_reason(metadata)
+            return 0.0
+        if not any(exact_mol_match(m, ymol) == 1.0 for m in products):  # type: ignore[arg-type]
+            RewardReason.INVALID_RXN.set_reason(metadata)
+            return 0.0
+    # Disallow products in the reactants or reagents
+    if any(exact_mol_match(m, ymol) == 1.0 for m in (reactants + reagents)):  # type: ignore[arg-type]
+        RewardReason.PRODUCT_IS_REACTANT.set_reason(metadata)
+        return 0.0
+    # check that the reactants are purchasable
+    def is_small_so_probably_purchasable(smi: str) -> bool:
+        mol = mol_from_smiles(smi)
+        # Molecules with <= 4 heavy atoms are likely purchasable,
+        # since they include solvents and counterions
+        return mol is not None and mol.GetNumHeavyAtoms() <= 4  # noqa: PLR2004
+    purchasable_results = fetch_purchasable(reactant_smi)
+    if not all(
+        purchasable_results.get(r, False) or is_small_so_probably_purchasable(r)
+        for r in reactant_smi
+    ):
+        RewardReason.NOT_PURCHASABLE.set_reason(metadata)
+        return 0.0
+    result = fetch_forward_rxn(yhat)
+    if "product" in result:
+        product = result["product"]
+        pmol = mol_from_smiles(product)
+        if pmol is None:
+            RewardReason.INVALID_MOL.set_reason(metadata)
+            return 0.0
+        if soft:
+            return tanimoto_similarity(pmol, ymol)
+        if exact_mol_match(pmol, ymol) == 1.0:
+            return 1.0
+        RewardReason.WRONG_PRODUCT.set_reason(metadata)
+        return 0.0
+    RewardReason.INVALID_RXN.set_reason(metadata)
+    return 0.0
+def valid_molecule_eval(
+    yhat: str,
+    y: str,  # noqa: ARG001
+    soft: bool = False,  # noqa: ARG001
+    test: bool = False,  # noqa: ARG001
+    metadata: dict[str, JsonValue] | None = None,  # noqa: ARG001
+) -> float:
+    """Evaluate if yhat is valid molecule."""
+    if not yhat:
+        return 0.0
+    mol = mol_from_smiles(yhat, sanitize=True)
+    return float(mol is not None)
+EVAL_FUNCTIONS: Mapping[str, RewardEvalFn] = {
+    "str_eval": str_eval,
+    "valid_mol_eval": valid_mol_eval,
+    "caption_eval": caption_eval,
+    "product_eval": product_eval,
+    "rxn_eval": rxn_eval,
+    "formula_eval": formula_eval,
+    "functional_group_eval": functional_group_eval,
+    "sol_eval": oracle_solubility_eval,
+    "rxn_forward": oracle_rxn_eval,
+    "should_not_answer_eval": str_eval,
+    "should_answer_eval": valid_molecule_eval,
+}
+# These correspond to open-ended problems that do not have a
+# unique molecule as answer.
+APPLY_GOOD_MOLECULE_CHECK: set[str] = {
+    "valid_mol_eval",
+    "formula_eval",
+    "functional_group_eval",
+    "sol_eval",
+}
+def accuracy_reward(
+    completions: Sequence[list[Mapping[str, str]]] | Sequence[str],
+    solution: Iterable[str],
+    reasoning: bool,
+    metadata: list[dict[str, JsonValue]] | None = None,
+    soft: bool = False,
+    test: bool = False,
+    good_molecule_bonus: float = 0.0,
+    **kwargs,  # noqa: ARG001
+) -> list[float]:
+    """Reward function that checks if the completion is the same as the ground truth."""
+    if isinstance(completions[0], list):
+        messages = cast(Sequence[list[Mapping[str, str]]], completions)
+        contents: Sequence[str] = [m[0]["content"] for m in messages]
+    else:
+        contents = cast(Sequence[str], completions)
+    if soft and test:
+        raise ValueError("Soft mode is not supported for test time accuracy reward.")
+    rewards = []
+    problem_types: list[str | None] = []
+    if metadata is None:
+        # Create empty metadata that we can use internal to this function
+        metadata = [{} for _ in contents]
+    else:
+        if metadata:
+            raise NotImplementedError(f"Received non-empty metadata {metadata}.")
+        metadata.extend([{} for _ in contents])
+    for content, info, meta in zip(contents, solution, metadata, strict=True):
+        reward = 0.0
+        reward_info = RewardFunctionInfo.model_validate(info)
+        fxn_name, answer_info, problem_type = tuple(reward_info.model_dump().values())
+        try:
+            answer: str | None = (
+                extract_answer_loose(content)
+                if test
+                else extract_thought_answer_strict(content, reasoning=reasoning)[1]
+            )
+            if answer is not None:
+                # During test time, see if full SMILES string was given as input
+                if problem_type == "valid_mol_eval" and test:
+                    # If we're testing, we only allow full SMILES strings
+                    reward = EVAL_FUNCTIONS[fxn_name](
+                        answer, answer_info, test=test, metadata=meta
+                    )
+                else:
+                    reward = EVAL_FUNCTIONS[fxn_name](
+                        answer, answer_info, soft=soft, metadata=meta
+                    )
+                RewardReason.set_default_reason(reward, meta)
+                if reward == 1.0 and fxn_name in APPLY_GOOD_MOLECULE_CHECK:
+                    if FULL_SMILES_KEY not in meta:
+                        raise ValueError(  # noqa: TRY301
+                            f"Missing full SMILES key in metadata {meta}"
+                            f" with reward function {fxn_name}."
+                        )
+                    full_smiles = cast(str, meta[FULL_SMILES_KEY])
+                    mol = mol_from_smiles(full_smiles)
+                    if mol is None:
+                        raise ValueError(  # noqa: TRY301
+                            f"Invalid full SMILES {full_smiles}"
+                            f" with reward function {fxn_name}."
+                        )
+                    meta["is_good_molecule"] = not contains_bad_substruct(mol)
+                    if meta["is_good_molecule"]:
+                        reward += good_molecule_bonus
+            else:
+                RewardReason.FORMAT_FAILED.set_reason(meta)
+            rewards.append(reward)
+            problem_types.append(problem_type)
+        except Exception:
+            logger.exception(
+                f"Unhandled exception in {fxn_name=} for {problem_type=}"
+                f" with inputs {content=}, {answer_info=} {soft=}, and {test=}."
+            )
+            RewardReason.REWARD_FUNCTION_EXCEPTION.set_reason(meta)
+            rewards.append(reward)
+            problem_types.append(None)
+    return rewards

src/ether0/rings.bloom ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1b81b5dc5e59d2068742aafb0c217067e951260f56527998e902d37ac177a25
+size 12500018

src/ether0/utils.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import logging
+import re
+from http import HTTPStatus
+from typing import TypeVar
+import regex
+from datasets import Dataset, DatasetDict, Version, load_dataset
+from datasets.exceptions import DatasetNotFoundError
+from huggingface_hub.errors import HfHubHTTPError
+from tenacity import (
+    before_sleep_log,
+    retry,
+    retry_if_exception,
+    stop_after_attempt,
+    wait_fixed,
+)
+logger = logging.getLogger(__name__)
+# pylint: disable-next=invalid-name
+TDataset = TypeVar("TDataset", bound=Dataset | DatasetDict)
+@retry(
+    retry=retry_if_exception(
+        lambda x: (
+            (
+                # On 2/11/2025 James kept seeing on the g3 server cluster:
+                # > huggingface_hub.errors.HfHubHTTPError: 504 Server Error: Gateway Time-out for
+                # > url: https://huggingface.co/api/datasets/org/repo/paths-info/abc123
+                # And on 3/14 James saw this on the g3 server cluster:
+                # > huggingface_hub.errors.HfHubHTTPError: 502 Server Error: Bad Gateway for
+                # > url: https://huggingface.co/api/datasets/org/repo/paths-info/abc123
+                isinstance(x, HfHubHTTPError)
+                and x.response.status_code
+                in {HTTPStatus.BAD_GATEWAY.value, HTTPStatus.GATEWAY_TIMEOUT.value}
+            )
+            # On 4/14/2025 James kept seeing on the g5 server cluster:
+            # > datasets.exceptions.DatasetNotFoundError:
+            # > Dataset 'org/repo' doesn't exist on the Hub or cannot be accessed.
+            or isinstance(x, DatasetNotFoundError)
+        )
+    ),
+    before_sleep=before_sleep_log(logger, logging.WARNING),
+    stop=stop_after_attempt(5),
+    wait=wait_fixed(5),
+)
+def load_dataset_retrying(
+    path: str,
+    revision: str | Version | None = None,
+) -> DatasetDict:
+    return load_dataset(path, revision=revision)
+# SEE: https://www.compart.com/en/unicode/block/U+2070 for subscript letters
+invalid_chars_regex = re.compile(
+    r"[^A-Za-z0-9Α-Ωα-ωₐₑₒₓₔₕₖₗₘₙₚₛₜ⁰¹²³⁴⁵⁶⁷⁸⁹₀₁₂₃₄₅₆₇₈₉×\s!\"#$%&±⁻'´ʻ‘’ʼ“”()*+⁺,\-—–‐‑‒―−⏤./:;«<≤=≡≈≆≥>›»⇌?@[\\\]^_`{|}~←⇐→➔➞➛➡➟➧➭⇨⇒⇛⟺⇔⟶…]"  # noqa: RUF001
+)
+invalid_languages_regex = regex.compile(
+    r"[\p{"
+    + r"}\p{".join({
+        # SEE: https://jrgraphix.net/r/Unicode/
+        "Arabic",
+        "Armenian",
+        "Bengali",
+        "Braille_Patterns",
+        "Cyrillic",
+        "Devanagari",
+        "Ethiopic",
+        "Georgian",
+        "Gujarati",
+        "Gurmukhi",
+        "Han",
+        "Hangul",
+        "Hebrew",
+        "Hiragana",
+        "Kannada",
+        "Katakana",
+        "Khmer",
+        "Latin_Extended_A",
+        "Latin_Extended_Additional",
+        "Latin_Extended_B",
+        "Malayalam",
+        "Myanmar",
+        "Syriac",
+        "Tamil",
+        "Telugu",
+        "Thaana",
+        "Thai",
+        "Tifinagh",
+    })
+    + r"}]"
+)
+def contains_invalid(
+    text: str, chars: bool = False, languages: bool = False, threshold: int = 1
+) -> tuple[bool, list[str]]:
+    """Check if the text contains invalid characters or languages."""
+    if chars:
+        matches = invalid_chars_regex.findall(text)
+        if len(matches) >= threshold:
+            return True, sorted(matches)
+    if languages:
+        matches = invalid_languages_regex.findall(text)
+        if len(matches) >= threshold:
+            return True, sorted(matches)
+    return False, []

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import pathlib
+import pytest
+from datasets import Dataset, load_dataset
+TESTS_DIR = pathlib.Path(__file__).parent
+REPO_ROOT_DIR = TESTS_DIR.parent
+@pytest.fixture(name="ether0_benchmark_test", scope="session")
+def fixture_ether0_benchmark_test() -> Dataset:
+    return load_dataset("futurehouse/ether0-benchmark", split="test")

tests/test_chat.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import pytest
+from ether0.chat import ChatArguments
+from ether0.model_prompts import ProblemPrompt, SysPrompt
+class TestChatArguments:
+    @pytest.mark.parametrize(
+        ("args", "row", "expected"),
+        [
+            (
+                ChatArguments(problem_prompt=ProblemPrompt.NONE),
+                {"problem": "stub problem"},
+                {"prompt": [{"content": "stub problem", "role": "user"}]},
+            ),
+            (
+                ChatArguments(problem_prompt=ProblemPrompt.NONE),
+                {"problem": ["stub problem", "stub problem 2"]},
+                {
+                    "prompt": [
+                        [{"content": "stub problem", "role": "user"}],
+                        [{"content": "stub problem 2", "role": "user"}],
+                    ]
+                },
+            ),
+            (
+                ChatArguments(
+                    sys_prompt=SysPrompt.SCIENTIFIC_AI,
+                    problem_prompt=ProblemPrompt.THINK_ANSWER,
+                ),
+                {"problem": "stub problem"},
+                {
+                    "prompt": [
+                        {
+                            "role": "system",
+                            "content": "You are a scientific reasoning AI assistant.",
+                        },
+                        {
+                            "role": "user",
+                            "content": (
+                                "A conversation between User and Assistant. The user"
+                                " asks a question, and the Assistant solves it. The"
+                                " assistant first thinks about the reasoning process in"
+                                " the mind and then provides the user with the answer."
+                                " The reasoning process and answer are enclosed within"
+                                " <|think_start|> <|think_end|>"
+                                " and <|answer_start|> <|answer_end|> tags,"
+                                " respectively, i.e., <|think_start|> reasoning process here"
+                                " <|think_end|><|answer_start|> answer here <|answer_end|>"
+                                "\n\nstub problem"
+                            ),
+                        },
+                    ]
+                },
+            ),
+        ],
+    )
+    def test_rl_conversation(
+        self, args: ChatArguments, row: dict, expected: dict
+    ) -> None:
+        assert args.make_rl_conversation(row) == expected

tests/test_data.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from collections.abc import Collection
+import pytest
+from datasets import Dataset
+from pydantic import JsonValue
+from ether0.data import (
+    SMILES_PATTERN,
+    get_problem_categories_from_datasets,
+    get_problem_category,
+)
+from ether0.models import RewardFunctionInfo
+from ether0.rewards import EVAL_FUNCTIONS
+def test_get_problem_categories_from_datasets(ether0_benchmark_test: Dataset) -> None:
+    assert get_problem_categories_from_datasets(ether0_benchmark_test) == {
+        "functional-group",
+        "molecule-completion",
+        "molecule-formula",
+        "molecule-name",
+        "oracle-solubility",
+        "property-cat-eve",
+        "property-cat-safety",
+        "property-cat-smell",
+        "property-regression-adme",
+        "property-regression-ld50",
+        "property-regression-pka",
+        "reaction-prediction",
+        "retro-synthesis",
+        "simple-formula",
+    }
+UNVERIFIABLE_PROBLEM_CATEGORY_PREFIXES_TO_EXCLUDE: Collection[str] = {
+    "oracle-solubility",  # 'ideal' is not actually an answer
+    "retro-synthesis",  # 'ideal' is not actually an answer
+}
+def test_evals(ether0_benchmark_test: Dataset) -> None:
+    failures = []
+    for row in ether0_benchmark_test:
+        reward_info = RewardFunctionInfo.model_validate(row["solution"])
+        fxn_name, answer_info, problem_type = tuple(reward_info.model_dump().values())
+        problem_category = get_problem_category(problem_type)
+        if (
+            problem_category in UNVERIFIABLE_PROBLEM_CATEGORY_PREFIXES_TO_EXCLUDE
+            or problem_category
+            == "molecule-completion"  # Molc had no 'ideal's when this was made
+        ):
+            continue
+        metadata: dict[str, JsonValue] = {}
+        try:
+            if problem_category.startswith("property"):
+                yhat = answer_info
+            else:
+                assert row["ideal"]
+                yhat = row["ideal"]
+            assert (
+                EVAL_FUNCTIONS[fxn_name](yhat=yhat, y=answer_info, metadata=metadata)
+                == 1.0
+            )
+        except AssertionError:
+            failures.append((problem_category, row["id"], metadata))
+    assert not failures
+TEST_REASONING_TEXT = (
+    "Let's analyze the given molecules and try to predict their LD50 values. LD50"
+    " refers to the lethal dose at which 50% of the test organisms die. A lower LD50"
+    " means higher toxicity, and a higher LD50 indicates lower toxicity. We need to"
+    " identify structural features that relate to toxicity.\n\nThe question leaves open"
+    " the possibility that none of the compounds have an LD50 of 320 mg/kg. Let's"
+    " consider each molecule individually:\n\n1."
+    " ClC1=C(C=CC(=C1)Cl)C1(OCC(O1)COC1=CC=C(C=C1)N1CCN(CC1)C(C)=O)CN1C=NC=C1: This"
+    " molecule appears to be quite complex. It has a dichloro-substituted aromatic"
+    " ring, an ether linkage, a morpholine ring, a piperazine ring, and an imidazole"
+    " ring. The presence of two chlorine atoms on the phenyl ring could suggest some"
+    " interaction with biological targets. The molecule also has a morpholine and"
+    " piperazine moiety which could contribute to binding with receptors or enzymes."
+    " The presence of an amide group might indicate some polarity, but the overall"
+    " structure looks relatively lipophilic (nonpolar) given the aromatic rings and"
+    " alkyl chains.\n\n2."
+    " ClC1=C(C=CC(=C1)Cl)[C@]1(OC[C@@H](O1)COC1=CC=C(C=C1)N1CCN(CC1)C1=CC=C(C=C1)N1C(N(N=C1)[C@H](CC)C)=O)CN1N=CN=C1:"  # noqa: E501
+    " This is a very complex molecule, with multiple rings, stereocenters, and"
+    " heteroatoms. It's a distinct structure and appears to be larger than the first"
+    " molecule. We can see a furan ring, a pyrazole ring, an amide group, and other"
+    " major differences. This change in the rings and other functional groups is likely"
+    " to significantly change the molecular properties compared to the first"
+    " molecule.\n\n3."
+    " [2H]C(C(=O)N1CCN(CC1)C1=CC=C(C=C1)OCC1O[C@@](OC1)(CN1C=NC=C1)C1=C(C=C(C=C1)Cl)Cl)([2H])[2H]:"  # noqa: E501
+    " This molecule, labeled with deuterium, has multiple rings including a piperazine,"
+    " furan, a substituted imidazole, and a dichlorinated phenyl ring. It also includes"
+    " an ester group which is sometimes associated with higher toxicity compared to"
+    " simple ethers.\n\nThinking about general principles of toxicity, lipophilicity"
+    " (fat solubility) is often related to higher toxicity. A molecule with a marked"
+    " lipophilic character can often accumulate in fatty tissues and interact with the"
+    " cell membrane, affect cellular transport or receptor activity. This could lead to"
+    " higher toxicity by interfering with normal cellular function. Similarly, the"
+    " presence of chlorine atoms can sometimes contribute to toxicity due to possible"
+    " metabolic activation to reactive intermediates. However, the position and nature"
+    " of other substituents and functional groups can influence how chlorine"
+    " substitutions modulate toxicity. For example, some chlorinated compounds are"
+    " relatively non-toxic.\n\nConsidering the size and complexity of the molecules, we"
+    " should think about their potential metabolic pathways. Large molecules can be"
+    " metabolized through various pathways, potentially leading to reactive"
+    " intermediates that interact with biological molecules. Metabolites of these"
+    " compounds might be more or less toxic than the initial molecules, and the"
+    " metabolic pathways themselves might be quite different. Perhaps one of the"
+    " metabolites could be the reason for an LD50 of 320 mg/kg. Alternatively, a"
+    " compound might be relatively non-toxic in itself, but its presence can alter"
+    " enzyme activity or other metabolic processes and indirectly lead to cell"
+    " damage.\n\nComparing the three molecules. Molecules 1 and 2 share some structural"
+    " features like the dichloro-substituted aromatic ring and the presence of a"
+    " morpholine ring system. However, they also have distinct differences in the"
+    " connectivity and presence of additional rings, including likely some more polar"
+    " and/or sterically bulky substituents. Molecule 3 has different ring systems and"
+    " the addition of both a deuterated methyl group and an ester group which adds"
+    " polar character and can often activate adjacent portions of the molecule by"
+    " metabolic oxygenation.\n\nLet's think about bioreactivity beyond simple chemical"
+    " interactions. Structures can influence how a molecule interacts with biological"
+    " receptors or enzymes. The size and shape of these molecules and the nature of the"
+    " functional groups can determine the extent of the molecule's binding interactions"
+    " with biomolecules. Some conformationally adaptable structures might bind strongly"
+    " to targets and interfere with crucial pathways, which can lead to toxicity."
+    " Therefore, weaknesses in essential molecular machinery could have similar"
+    " negative effects if bound by those biomolecules.\n\nIf one of these molecules has"
+    " an LD50 of 320 mg/kg, it suggests moderate toxicity. It could be that one of the"
+    " molecules doesn't have the necessary structural features to interact strongly"
+    " with critical biological targets for high toxicity, and/or it might be"
+    " metabolized to relatively non-toxic products, such as carbon dioxide and water."
+    " Thus, while the molecules share some features with other potentially bioactive"
+    " molecules, it could be that they themselves are not exceptionally potent."
+)
+NO_SMILES_TEXT = "This text does not contain any SMILES"
+@pytest.mark.parametrize(
+    ("text", "expected_answer"),
+    [
+        (
+            TEST_REASONING_TEXT,
+            [
+                "ClC1=C(C=CC(=C1)Cl)C1(OCC(O1)COC1=CC=C(C=C1)N1CCN(CC1)C(C)=O)CN1C=NC=C1",
+                "ClC1=C(C=CC(=C1)Cl)[C@]1(OC[C@@H](O1)COC1=CC=C(C=C1)N1CCN(CC1)C1=CC=C(C=C1)N1C(N(N=C1)[C@H](CC)C)=O)CN1N=CN=C1",
+                "[2H]C(C(=O)N1CCN(CC1)C1=CC=C(C=C1)OCC1O[C@@](OC1)(CN1C=NC=C1)C1=C(C=C(C=C1)Cl)Cl)([2H])[2H]",
+            ],
+        ),
+        (
+            NO_SMILES_TEXT,
+            [],
+        ),
+    ],
+)
+def test_extract_smiles_from_text(text: str, expected_answer: list[str]) -> None:
+    assert sorted(SMILES_PATTERN.findall(text)) == sorted(expected_answer)

tests/test_model_prompts.py ADDED Viewed

	@@ -0,0 +1,424 @@

+import pytest
+from ether0.model_prompts import (
+    ANSWER_END,
+    ANSWER_START,
+    THINK_END,
+    THINK_START,
+    ProblemPrompt,
+    extract_answer_loose,
+    extract_thought_answer_strict,
+)
+def test_problem_prompt() -> None:
+    none_prompt = ProblemPrompt.NONE.get_prompt()
+    assert isinstance(none_prompt, str)
+    assert "think" not in none_prompt
+    assert "answer" not in none_prompt
+    answer_prompt = ProblemPrompt.ANSWER.get_prompt()
+    assert isinstance(answer_prompt, str)
+    assert "think" not in answer_prompt
+    assert "answer" in answer_prompt
+    think_answer_prompt = ProblemPrompt.THINK_ANSWER.get_prompt()
+    assert isinstance(think_answer_prompt, str)
+    assert "think" in think_answer_prompt
+    assert "answer" in think_answer_prompt
+@pytest.mark.parametrize(
+    ("content", "expected"),
+    [
+        pytest.param("<answer>CCO</answer>", "CCO", id="base"),
+        pytest.param("<answer></answer>", "", id="empty-answer"),
+        pytest.param("<answer> </answer>", "", id="space-answer"),
+        pytest.param("\n<answer>CCO</answer>", "CCO", id="base-extra-whitespace-1"),
+        pytest.param("\n<answer>CCO</answer>\n", "CCO", id="base-extra-whitespace-2"),
+        pytest.param(" <answer>CCO</answer> ", "CCO", id="base-extra-whitespace-3"),
+        pytest.param("word<answer>CCO</answer> ", "CCO", id="base-extra-whitespace-4"),
+        pytest.param("<answer>\nCCO\n</answer>", "CCO", id="base-w-newlines"),
+        pytest.param(
+            "<answer> \nCCO\n</answer>", "CCO", id="base-w-spaces-and-newlines"
+        ),
+        pytest.param(
+            "<answer>\n\nCCO\n\n</answer>", "CCO", id="base-w-double-newlines"
+        ),
+        pytest.param("<answer> CCO </answer>", "CCO", id="base-w-spaces"),
+        pytest.param(
+            "<answer> < CCO</answer>", "< CCO", id="base-contains-potential-xml-1"
+        ),
+        pytest.param(
+            "<answer> <stub CCO</answer>",
+            "<stub CCO",
+            id="base-contains-potential-xml-2",
+        ),
+        pytest.param("<answer><answer>CCO</answer></answer>", "", id="nested"),
+        pytest.param(
+            "<answer>\n<answer>\nCCO\n</answer>\n</answer>", "", id="nested-w-newlines"
+        ),
+        pytest.param(
+            "<think>thought</think><answer>CCO</answer>", "CCO", id="thought-base"
+        ),
+        pytest.param(
+            "<think>thought</think><answer>\nCCO\n</answer>",
+            "CCO",
+            id="thought-base-w-newlines",
+        ),
+        pytest.param(
+            "<answer>CCO</answer><think>thought</think>", "CCO", id="thought-reversed"
+        ),
+        pytest.param(
+            "<answer>\nCCO\n</answer><think>thought</think>",
+            "CCO",
+            id="thought-reversed-w-newlines",
+        ),
+        pytest.param(
+            "<answer>echoing prompt</answer><answer>CCO2</answer>",
+            "CCO2",
+            id="multi-answer",
+        ),
+        pytest.param(
+            "<answer>echoing prompt</answer><answer>\nCCO2\n</answer>",
+            "CCO2",
+            id="multi-answer-w-newlines",
+        ),
+    ],
+)
+def test_extract_answer_loose(content: str | None, expected: str) -> None:
+    assert extract_answer_loose(content) == expected
+@pytest.mark.parametrize(
+    ("content", "expected_answer", "expected_thought"),
+    [
+        pytest.param(f"{ANSWER_START}CCO{ANSWER_END}", None, None, id="no-thought"),
+        pytest.param(
+            f"{ANSWER_START}{ANSWER_END}", None, None, id="no-thought-empty-answer"
+        ),
+        pytest.param(
+            f"{ANSWER_START} {ANSWER_END}", None, None, id="no-thought-space-answer"
+        ),
+        pytest.param(
+            f"{ANSWER_START}\nCCO\n{ANSWER_END}",
+            None,
+            None,
+            id="no-thought-w-newlines",
+        ),
+        pytest.param(
+            f"{ANSWER_START}{ANSWER_START}CCO{ANSWER_END}{ANSWER_END}",
+            None,
+            None,
+            id="no-thought-nested",
+        ),
+        pytest.param(
+            f"{ANSWER_START}\n{ANSWER_START}\nCCO\n{ANSWER_END}\n{ANSWER_END}",
+            None,
+            None,
+            id="no-thought-nested-w-newlines",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START}{ANSWER_END}",
+            None,
+            "thought",
+            id="empty-answer",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START} {ANSWER_END}",
+            None,
+            "thought",
+            id="space-answer",
+        ),
+        pytest.param(
+            # Allow models to place up to one whitespace before the thought
+            f"\n{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-extra-whitespace-1",
+        ),
+        pytest.param(
+            f"\n{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}\n",
+            None,
+            None,
+            id="base-extra-whitespace-2",
+        ),
+        pytest.param(
+            f" {THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END} ",
+            None,
+            None,
+            id="base-extra-whitespace-3",
+        ),
+        pytest.param(
+            f"word{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}\n",
+            None,
+            None,
+            id="base-extra-whitespace-4",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START}\nCCO\n{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-w-newlines",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START} \nCCO\n{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-w-spaces-and-newlines",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START}\n\nCCO\n\n{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-w-double-newlines",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START} CCO {ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-w-spaces",
+        ),
+        pytest.param(
+            f"{THINK_START}\nthought\n{THINK_END}{ANSWER_START}\nCCO\n{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-w-newlines-both",
+        ),
+        pytest.param(
+            f"{THINK_START}thought\nthought{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought\nthought",
+            id="base-inner-newline",
+        ),
+        pytest.param(
+            f"{THINK_START}\nthought\nthought\n{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought\nthought",
+            id="base-inner-newline-w-newlines",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}inter{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-inter",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}inter\ninter{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-inter-inner-newline",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}\ninter\ninter\n{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="base-inter-inner-newline-w-newlines",
+        ),
+        pytest.param(
+            f"{ANSWER_START}CCO{ANSWER_END}{THINK_START}thought{THINK_END}",
+            None,
+            None,
+            id="base-reversed",
+        ),
+        pytest.param(
+            f"{ANSWER_START}\nCCO\n{ANSWER_END}{THINK_START}thought{THINK_END}",
+            None,
+            None,
+            id="base-reversed-w-newlines",
+        ),
+        pytest.param(
+            f"{THINK_START}thought < thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought < thought",
+            id="thought-contains-potential-xml-1",
+        ),
+        pytest.param(
+            f"{THINK_START}thought <stub"
+            f" thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought <stub thought",
+            id="thought-contains-potential-xml-2",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}a",
+            None,
+            None,
+            id="text-after-answer",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            None,
+            None,
+            id="multi-thought",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}<thought>thought</thought>{ANSWER_START}CCO{ANSWER_END}",
+            "CCO",
+            "thought",
+            id="interleaved-think-thought",
+        ),
+        pytest.param(
+            f"{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}Some"
+            f" text{THINK_START}thought{THINK_END}{ANSWER_START}CCO{ANSWER_END}",
+            None,
+            None,
+            id="continuation",
+        ),
+        pytest.param(
+            f"{ANSWER_START}echoing prompt{ANSWER_END}{ANSWER_START}CCO2{ANSWER_END}",
+            None,
+            None,
+            id="no-thought-multi-answer",
+        ),
+        pytest.param(
+            f"{ANSWER_START}echoing"
+            f" prompt{ANSWER_END}{ANSWER_START}\nCCO2\n{ANSWER_END}",
+            None,
+            None,
+            id="no-thought-multi-answer-w-newlines",
+        ),
+        pytest.param(
+            f"{THINK_START}\nAlright, so I need to figure out the IUPAC name for the"
+            " molecule with the formula C1=CC(=CC=C1O)O. Let me start by trying to"
+            " visualize the structure. The formula seems a bit complex, so breaking it"
+            " down might help.\n\nFirst, I notice there's a ring structure because of"
+            " the C1 notation, which suggests a cyclic compound. The presence of"
+            " double bonds (the = signs) indicates that it's not just a simple alkane."
+            " So, I'm thinking it's a cyclic diene or something similar.\n\nLooking"
+            " closer at the formula, I see two oxygen atoms attached to the ring. The"
+            " first O is attached to a carbon that's part of a double bond (C1=CC...),"
+            " and the second O is attached to another carbon that's also part of a"
+            " double bond. So, there are two ketone groups or possibly ester groups?"
+            " Wait, no, the formula is C1=CC(=CC=C1O)O, which might imply that each"
+            " carbon attached to the ring has an oxygen, but let me try to count the"
+            " bonds properly.\n\nWait, perhaps I should draw this out. Let me imagine"
+            " the ring. Carbon 1 (C1) is double-bonded to a carbon (C2). Then, C2 is"
+            " connected to another carbon (C3) via a double bond, which is then"
+            " connected to C4, and so on, until I come back to C1, forming a ring. But"
+            " since there are multiple double bonds, it's probably a conjugated diene"
+            " or something like that.\n\nWait, maybe it's a cyclopentadiene"
+            " derivative. But the presence of two oxygen atoms makes it more likely to"
+            " be a dienol ether or something similar. Alternatively, it could be a"
+            " cyclohexene derivative with two ketone groups, but I'm not sure.\n\nLet"
+            " me count the carbons. The formula is C1=CC(=CC=C1O)O. So, each 'C' is"
+            " part of the ring. Let me see: C1 is double-bonded to C2, then C2 is"
+            " connected via a single bond to another group, which is C3, which is"
+            " double-bonded to C4, and so on until I close the ring back to C1. Hmm,"
+            " maybe it's a cyclopentadiene ring with substituents.\n\nWait, perhaps"
+            " it's a cyclopentadienyl ether. Let me think about how the substituents"
+            " are arranged. If C1 is connected via a double bond to C2, then C2 has"
+            " another substituent, which is C3 via a double bond, and so on. Maybe the"
+            " structure is such that there are two adjacent double bonds, making it a"
+            " conjugated diene.\n\nBut I'm getting a bit confused. Maybe I should"
+            " approach this differently. Let's try to write out the structure step by"
+            " step.\n\nStarting with C1, which is double-bonded to C2: C1=C2. Then, C2"
+            " is connected to another carbon, which is part of a double bond to C3:"
+            " C2=C3. So, now we have C1=C2-C3=... Then, C3 is connected to C4, which"
+            " is connected back to C1, forming a ring. So, the ring would be a"
+            " five-membered ring with two double bonds.\n\nWait, but a five-membered"
+            " ring with two double bonds would be conjugated diene in a ring. So,"
+            " that's cyclopentadiene. Now, each of the carbons in the double bonds has"
+            " an oxygen substituent. So, C1 has an O attached, and C3 also has an O"
+            " attached. So, it's cyclopentadiene-1,3-dione? But wait, the formula is"
+            " C1=CC(=CC=C1O)O, which suggests that each double bond carbon has an O"
+            " attached. So, both C1 and C3 have O substituents.\n\nBut"
+            " cyclopentadiene-1,3-dione would have two ketone groups, which would make"
+            " it a diketone. Alternatively, it could be a dienol ether, but with two"
+            " oxygen atoms attached to the ring.\n\nWait, another possibility is that"
+            " it's a cyclopentadienyl ether with two substituents. Alternatively,"
+            " perhaps it's a substituted cyclopentadienone.\n\nWait, maybe I should"
+            " count the number of atoms. The formula is C1=CC(=CC=C1O)O. Let's parse"
+            " this:\n\n- C1 is connected via double bond to C2.\n- C2 is connected to"
+            " another carbon (let's say C3) which is double-bonded to C4.\n- C4 is"
+            " connected back to C1, forming a ring.\n- Each of C1 and C3 has an oxygen"
+            " attached.\n\nWait, that would make a five-membered ring with two double"
+            " bonds and two oxygen atoms. So, perhaps it's cyclopentadienone-1,3-dioic"
+            " acid? No, that doesn't sound right because the formula doesn't indicate"
+            " acid groups.\n\nAlternatively, maybe it's a dienol ether, where two of"
+            " the double bond carbons have ether oxygen substituents.\n\nWait, perhaps"
+            " the correct name is something like 1,3-dihydroxycyclopentadiene or"
+            " similar. Alternatively, since the oxygens are on the double bond"
+            " carbons, maybe it's a dienol ether.\n\nWait, I'm getting stuck. Maybe I"
+            " should think about the structure again.\n\nThe formula is"
+            " C1=CC(=CC=C1O)O. So, let's parse the SMILES notation:\n\n-"
+            " C1=CC(=CC=C1O)O\nBreaking it down:\n- C1 is the first carbon in a"
+            " ring.\n- C1 is double-bonded to C2: C1=C2\n- C2 is connected to another"
+            " carbon, C3: C2-C3\n- C3 has a double bond to C4: C3=C4\n- C4 is"
+            " connected back to C1, closing the ring: C4=C1\n- Additionally, C1 and C3"
+            " each have an oxygen attached: C1=O and C3=O\n\nWait, that would mean"
+            " both C1 and C3 are double-bonded to O. So, it's a cyclopentadiene ring"
+            " with two ketone groups. So, the IUPAC name would be something like"
+            " cyclopentadien-1,3-dione.\n\nWait, but isn't cyclopentadien-1,3-dione a"
+            " thing? Let me check. Yes, in organic chemistry, cyclopentadien-1,3-dione"
+            " is a known compound, also called 1,3-cyclopentadienedione.\n\nBut in the"
+            " given formula, the oxygens are in positions 1 and 3, connected via"
+            " double bonds. So, the IUPAC name would be"
+            " cyclopentadien-1,3-dione.\n\nWait, but sometimes the numbering might be"
+            " different. Let me think about the numbering in the ring. Starting at C1,"
+            " moving to C2, C3, C4, C5, and back to C1.\n\nWait, no, in this case, the"
+            " ring would be five-membered, with double bonds at C1-C2 and C3-C4, and"
+            " oxygens attached to C1 and C3. So, the correct IUPAC name would be"
+            " cyclopentadien-1,3-dione.\n\nAlternatively, perhaps it's named as"
+            " 1,3-diketo-cyclopentadiene, but I think the more standard name would be"
+            " cyclopentadien-1,3-dione.\n\nWait, but let me make sure. The correct"
+            " IUPAC name would assign numbers starting from the first substituent. So,"
+            " C1 has an oxygen, C3 has an oxygen, so the substituents are at positions"
+            " 1 and 3.\n\nTherefore, the IUPAC name would be"
+            " cyclopentadien-1,3-dione.\n\nBut wait, sometimes the parent structure is"
+            " considered in a way that gives the lowest possible numbers."
+            " Alternatively, it might be named as 1,3-dicyclohexa-1,3-diene-1,3-dione,"
+            " but no, that's not right because it's a five-membered ring.\n\nI think"
+            " I'm confident that the correct IUPAC name is"
+            f" cyclopentadien-1,3-dione.\n{THINK_END}\n\nThe IUPAC name of the molecule"
+            " with the formula C1=CC(=CC=C1O)O is **cyclopentadien-1,3-dione**."
+            f" \n\n{ANSWER_START} cyclopentadien-1,3-dione {ANSWER_END}",
+            "cyclopentadien-1,3-dione",
+            None,
+            id="actual-case-1",
+        ),
+        pytest.param(
+            f"{THINK_START} I need to complete the molecule by adding a functional"
+            " group or atom to it. To do this, I'll consider the existing structure"
+            " and choose a suitable group that can be added."
+            f" {THINK_END}\n{ANSWER_START} ClC1=CC(Cl)=CC(C2=C(/octet)2)"
+            f" {ANSWER_END}\n\nWhat is a valid completion of this"
+            f" molecule:\nClC1=CC(Cl)=CC(C2=C(/octet\n/octetassistant\n{THINK_START} To"
+            " complete the molecule, I need to consider the existing structure and"
+            " determine what type of functional group or atom can be added. I'll"
+            " analyze the existing bonds and determine the most suitable option."
+            f" {THINK_END}\n{ANSWER_START} ClC1=CC(Cl)=CC(C2=C(/octet)2) {ANSWER_END}",
+            None,
+            None,
+            id="actual-case-2",
+        ),
+    ],
+)
+def test_extract_answer_thought_strict_reasoning(
+    content: str, expected_answer: str | None, expected_thought: str | None
+) -> None:
+    thought, answer = extract_thought_answer_strict(content, reasoning=True)
+    assert answer == expected_answer
+    if expected_thought:
+        assert thought == expected_thought
+@pytest.mark.parametrize(
+    ("content", "expected_answer"),
+    [(
+        "<|answer_start|>Clc1ccc(cc1)OCCOC(=O)COC(=O)CCNC(=O)COc2ccccc2<|answer_end|>",
+        "Clc1ccc(cc1)OCCOC(=O)COC(=O)CCNC(=O)COc2ccccc2",
+    )],
+)
+def test_extract_answer_thought_strict_no_reasoning(
+    content: str, expected_answer: str | None
+) -> None:
+    thought, answer = extract_thought_answer_strict(content, reasoning=False)
+    assert answer == expected_answer
+    assert thought is None

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import pytest
+from datasets import Dataset
+from ether0.models import QAExample, RewardFunctionInfo, filter_problem_types
+class TestModels:
+    def test_load(self, ether0_benchmark_test: Dataset) -> None:
+        ether0_parsed = [QAExample(**r) for r in ether0_benchmark_test]
+        ex_0 = ether0_parsed[0]
+        assert isinstance(ex_0, QAExample)
+        assert ex_0.id == "00c8bc2d-0bb3-53c2-8bdf-cd19616d4536"
+        assert (
+            ex_0.problem
+            == "Generate a SMILES representation for a molecule containing groups:"
+            " charged and nitro. It should also have formula C13H12N6O5."
+        )
+        assert ex_0.problem_type == "functional-group"
+        assert ex_0.ideal == "Cc1ncc([N+](=O)[O-])n1CC(=O)N/N=C/c1ccc([N+](=O)[O-])cc1"
+        assert ex_0.unformatted == "C13H12N6O5,['charged', 'nitro']"
+        assert isinstance(ex_0.solution, RewardFunctionInfo)
+        ex0_sol = ex_0.solution
+        assert (
+            (ex0_sol.fxn_name, ex0_sol.answer_info, ex0_sol.problem_type)
+            == tuple(ex0_sol.model_dump().values())
+            == (
+                "functional_group_eval",
+                "('C13H12N6O5', ['charged', 'nitro'])",
+                "functional-group",
+            )
+        )
+# NOTE: the num_expected_types numbers may have to be adjusted if we add
+# more problem types to the dataset.
+@pytest.mark.parametrize(
+    ("filters", "should_remove_rows", "num_expected_types", "should_raise"),
+    [
+        pytest.param([], False, 70, False, id="no-filter-1"),
+        pytest.param(None, False, 70, False, id="no-filter-2"),
+        pytest.param(["reaction-prediction"], True, 1, False, id="include-1"),
+        pytest.param(
+            ["reaction-prediction", "retro-synthesis"],
+            True,
+            2,
+            False,
+            id="include-2",
+        ),
+        pytest.param(["!reaction-prediction"], True, 69, False, id="exclude-1"),
+        pytest.param(
+            ["!reaction-prediction", "molecule-name"],
+            # Note that in this case, should_remove_rows and num_expected are just
+            # dummy values. Filtering should fail before we get there.
+            True,
+            999,
+            True,
+            id="exclude-include",
+        ),
+    ],
+)
+def test_filter_problem_types(
+    ether0_benchmark_test: Dataset,
+    filters: list[str] | None,
+    should_remove_rows: bool,
+    num_expected_types: int,
+    should_raise: bool,
+) -> None:
+    if should_raise:
+        with pytest.raises(
+            ValueError,
+            match="Cannot specify both problem types to keep and to exclude",
+        ):
+            filter_problem_types(ether0_benchmark_test, filters)
+        return
+    filtered = filter_problem_types(ether0_benchmark_test, filters)
+    problem_types = set(filtered["problem_type"])
+    assert len(problem_types) == num_expected_types
+    assert (len(filtered) < len(ether0_benchmark_test)) == should_remove_rows

tests/test_rewards.py ADDED Viewed

	@@ -0,0 +1,489 @@

+from unittest.mock import patch
+import pytest
+from pydantic import JsonValue
+from ether0.data import is_reasonable_fp, is_reasonable_ring_system, mol_from_smiles
+from ether0.models import RewardReason
+from ether0.rewards import (
+    caption_eval,
+    formula_diff,
+    formula_eval,
+    functional_group_eval,
+    oracle_rxn_eval,
+    product_eval,
+    rxn_eval,
+    str_eval,
+    valid_mol_eval,
+    valid_molecule_eval,
+)
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected"),
+    [
+        pytest.param(
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1(H)-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            1.0,
+            id="parentheses",
+        ),
+        pytest.param(
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1{H}-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            1.0,
+            id="culies parentheses",
+        ),
+        pytest.param(
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            1.0,
+            id="same",
+        ),
+        pytest.param(
+            " methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate  ",  # noqa: E501
+            1.0,
+            id="spacing",
+        ),
+        pytest.param(
+            "methyl 3-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            "methyl 2-(ethylcarbamoyl)-1,3-dioxo-2,3-dihydro-1H-pyrrolo[3,4-c]pyridine-5-carboxylate",  # noqa: E501
+            0.0,
+            id="different",
+        ),
+        pytest.param(
+            "(5S,8R,9S,10R,13S,14R,17S)-17-[(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-(benzoylamino)-2-hydroxy-3-phenylpropanoyl]oxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^{3,10}.0^{4,7}]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            " (5~S~,8~R~,9~S~,10R,13S,14R,17S)-17-[(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-(benzoylamino)-2-hydroxy-3-phenylpropanoyl]oxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^{3,10}.0^{4,7}]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            1.0,
+            id="italics",
+        ),
+        pytest.param(
+            "(5S,8R,9S,10R,13S,14R,17S)-17-[(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-(benzoylamino)-2-hydroxy-3-phenylpropanoyl]oxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^{3,10}.0(4,7)]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            " (5~S~,8~R~,9~S~,10R,13S,14R,17S)-17-[(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-(benzoylamino)-2-hydroxy-3-phenylpropanoyl]oxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^(3,10).0^{4,7}]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            1.0,
+            id="curlies and carrots",
+        ),
+        pytest.param(
+            "(5S,8R,9S,10R,13S,14R,17S)-17-[(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-benzoylamino-2-hydroxy-3-phenylpropanoyl]oxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^{3,10}.0(4,7)]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            " (5~S~,8~R~,9~S~,10R,13S,14R,17S)-17-[(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-(benzoylamino)-2-hydroxy-3-phenylpropanoyl]oxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^(3,10).0^{4,7}]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            1.0,
+            id="more parentheses",
+        ),
+        pytest.param(
+            "(5S,8R,9S,10R,13S,14R,17S)-17-[(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-(benzoylamino)-2-hydroxy-3-phenylpropanoyl]oxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^{3,10}.0(4,7)]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            " (5~S~,8~R~,9~S~,10R,13S,14R,17S)-17-(1R,2S,3R,4S,7R,9S,10S,12R,15S)-3-(benzoylamino)-2-hydroxy-3-phenylpropanoyloxy-5,9-dihydroxy-4,10,13-trimethyl-11-oxo-6-oxatetracyclo[11.3.1.0^(3,10).0^{4,7}]heptadec-14-en-8-yl (2R,3S)-3-benzamido-2-hydroxy-3-phenylpropanoate",  # noqa: E501
+            0.0,
+            id="bad-parentheses",
+        ),
+    ],
+)
+def test_str_eval(yhat: str, y: str, expected: float) -> None:
+    assert str_eval(yhat, y) == expected
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected"),
+    [
+        pytest.param(
+            "Buchwald-Hartwig amination",
+            "Buchwald-Hartwig amination",
+            1.0,
+            id="same rxn",
+        ),
+        pytest.param(
+            "buchwald hartwig amination",
+            "Buchwald-Hartwig amination",
+            1.0,
+            id="caps/hyphens",
+        ),
+        pytest.param(
+            "BuchwaldHartwigAmination",
+            "Buchwald-Hartwig amination",
+            1.0,
+            id="no spaces",
+        ),
+        pytest.param(
+            "Buchwald\u2013Hartwig amination",
+            "Buchwald-Hartwig amination",
+            1.0,
+            id="en dash",
+        ),
+        pytest.param(
+            "Buchwald\u2013Hartwig animation",
+            "Buchwald-Hartwig amination",
+            0.0,
+            id="false positive",
+        ),
+    ],
+)
+def test_rxn_eval(yhat: str, y: str, expected: float) -> None:
+    assert rxn_eval(yhat, y) == expected
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected"),
+    [
+        pytest.param(
+            "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14)C=6C=CC=CC6",
+            "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14",
+            1.0,
+            id="full-answer",
+        ),
+        pytest.param(
+            ")C=6C=CC=CC6",
+            "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14",
+            1.0,
+            id="partial-answer",
+        ),
+        pytest.param(
+            "",
+            "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14",
+            0.0,
+            id="empty-generation",
+        ),
+        pytest.param(
+            "CCC",
+            "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14",
+            0.0,
+            id="wrong-valid-SMILES",
+        ),
+        pytest.param(
+            "applesauce",
+            "O=C(OC1C(OC(=O)C=2C=CC=CC2)C3(O)C(C)(C)CCCC3(C)C4CC=5OC=CC5C(C)C14",
+            0.0,
+            id="non-SMILES-yhat",
+        ),
+    ],
+)
+def test_valid_mol_eval(yhat: str, y: str, expected: float) -> None:
+    metadata: dict[str, JsonValue] = {}
+    assert (
+        valid_mol_eval(yhat, y, metadata=metadata) == expected
+    ), f"Reason for failure: {metadata}"
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected_reward", "expected_reason"),
+    [
+        pytest.param(
+            "CCCO",
+            "CCCO",
+            1.0,
+            None,
+            id="exact-match",
+        ),
+        pytest.param(
+            "CCCO",
+            "C#N",
+            0.0,
+            RewardReason.INVALID_GROUND_TRUTH,
+            id="chembench-8ee3546d-a3b8-4c7b-90ef-ead9ff11a50d-removed",
+        ),
+    ],
+)
+def test_product_eval(
+    yhat: str,
+    y: str,
+    expected_reward: float,
+    expected_reason: RewardReason | None,
+) -> None:
+    metadata: dict[str, JsonValue] = {}
+    assert product_eval(yhat, y, metadata=metadata) == expected_reward
+    assert metadata.get("reward_reason") == expected_reason
+    # Also testing caption_eval here since it's the same
+    assert caption_eval(yhat, y, metadata=metadata) == expected_reward
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected"),
+    [
+        pytest.param(
+            r"C/C=C(/C)\C(=O)O[C@@H]1C[C@@]2(C(=O)C=C(O2)/C(=C\[C@@H]3[C@@H]1C(=C)C(=O)O3)/CO)C",
+            "C=C1C(=O)O[C@@H]2/C=C(/CO)C3=CC(=O)[C@@](C)(C[C@@H](OC(=O)C(C)=CC)[C@@H]12)O3",
+            1.0,
+            id="match",
+        ),
+        pytest.param(
+            "CC1=CC(=C(C(=C1C(=O)O)O)C)OC(=O)C2=C(C(=C(C=C2C)OC)C)OC",
+            "C=C1C(=O)O[C@@H]2/C=C(/CO)C3=CC(=O)[C@@](C)(C[C@@H](OC(=O)C(C)=CC)[C@@H]12)O3",
+            0.05,
+            id="formula-match",
+        ),
+        pytest.param(
+            "CC1=CC(=C(C(=C1C(=O)O)O)C)OC(=O",
+            "C=C1C(=O)O[C@@H]2/C=C(/CO)C3=CC(=O)[C@@](C)(C[C@@H](OC(=O)C(C)=CC)[C@@H]12)O3",
+            0.0,
+            id="bad-mol",
+        ),
+        pytest.param(
+            "CC1=C[C@@H]2O[C@H]3C[C@H]4OC(=O)C=CC=CC(=O)OCC[C@@]5(C)O[C@@H]5C(=O)OC[C@]2(CC1)[C@@]4(C)[C@]31CO1",
+            "CC1=C[C@@H]2O[C@H]3C[C@H]4OC(=O)C=CC=CC(=O)OCC[C@@]5(C)O[C@@H]5C(=O)OC[C@]2(CC1)[C@@]4(C)[C@]31CO1",
+            1.0,
+            id="wild-molecule",
+        ),
+    ],
+)
+def test_formula_eval(yhat: str, y: str, expected: float) -> None:
+    metadata: dict[str, JsonValue] = {}
+    assert (
+        formula_eval(yhat, y, soft=True, metadata=metadata) >= expected
+    ), f"Reason for failure: {metadata}"
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected"),
+    [
+        pytest.param(
+            r"Cc1nc(NC(=O)[C@@H](N)CO)sc1-c1cnc(Cl)c(NS(=O)(=O)c2ccccc2)c1",
+            "('C18H18ClN5O4S2', ['imidoylhalide cyclic'])",
+            1.0,
+            id="match",
+        ),
+        pytest.param(
+            r"Cc1nc(NC(=O)[C@@H](N)CO)sc1-c1cnc(Cl)c(NS(=O)(=O)c2ccccc2)c1",
+            "('C18H18ClN5O4S2', ['imidoylhalide cyclic', 'non-existing'])",
+            0.0,
+            id="bad groups",
+        ),
+        pytest.param(
+            r"Cc1nc(NC(=O)[C@@H](N)CO)sc1-c1cnc(Cl)c(NS(=O)(=O)c2ccccc2)c1",
+            "('C18H18ClN5O4S3', ['imidoylhalide cyclic'])",
+            0.0,
+            id="bad formula",
+        ),
+        pytest.param(
+            r"CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@](O)(c3ccccc3)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@H](N(C)C)[C@H]2O)[C@](C)(O)C[C@@H](C)CN[C@H](C)[C@@H](O)[C@]1(C)O",
+            "('C43H74N2O12', ['1,2-Aminoalcohol', 'hydroxylated heteroatom substituted glycosidic ring', 'tertiary alcohol'])",  # noqa: E501
+            1.0,
+            id="renamed-groups",
+        ),
+        pytest.param(r"CCC", "('C3H8', [])", 1.0, id="no-groups"),
+        pytest.param(r"CCCNNNNN", "('C3H13N5', [])", 0.0, id="unreasonable-molecule"),
+        pytest.param(r"C1CCCCC2C1CCCCCCCCC2", "('C16H30', [])", 0.0, id="bad-ring"),
+        pytest.param(
+            "CCCCCBr", "('C5H11Br',['alkylbromide'])", 1.0, id="observed-problem"
+        ),
+    ],
+)
+def test_functional_group_eval(yhat: str, y: str, expected: float) -> None:
+    metadata: dict[str, JsonValue] = {}
+    assert (
+        functional_group_eval(yhat, y, metadata=metadata) == expected
+    ), f"Reason for failure: {metadata}"
+@pytest.mark.parametrize(
+    ("yhat", "y", "expected"),
+    [
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>",
+            "CCCN(C)C1CC(C)(C)NC1=O",
+            1.0,
+            id="match",
+        ),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>CCCN(C)C1CC(C)(C)NC1=O",
+            "CCCN(C)C1CC(C)(C)NC1=O",
+            1.0,
+            id="match-w-product",
+        ),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>CCCCN(C=O)C1CC(C)(C)N(C(=O)C)O1",
+            "CCCN(C)C1CC(C)(C)NC1=O",
+            0.0,
+            id="match-w-non-matching-product",
+        ),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>CCCXeN(C=O)C1CC(C)(C)N(C(=O)C)O1",
+            "CCCN(C)C1CC(C)(C)NC1=O",
+            0.0,
+            id="match-w-invalid-product",
+        ),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O",
+            "CCCN(C)C1CC(C)(C)NC1=O",
+            0.0,
+            id="match-wo-trailing",
+        ),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>>>>",
+            "CCCN(C)C1CC(C)(C)NC1=O",
+            0.0,
+            id="no-match-w-many-trailing",
+        ),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1",
+            "CCCN(C=O)C1CC(C)(C)N(C(=O)C)O1",
+            0.0,
+            id="invalid",
+        ),
+        pytest.param(
+            "C(P)(P)(P)CC=O.CC1(C)(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O>",
+            "CCCN(C=O)C1CC(C)(C)N(C(=O)C)O1",
+            0.0,
+            id="no-purchase",
+        ),
+        pytest.param(
+            "OB(O)c1cc(C2CC2)cnc1Cl.Cl -> OB(O)c1cc(C2CC2)cnc1Cl + HBr + HIO2 + HIO3S + CH3COOH || 3s | 3*375I | 9*63BrI | 3*55Br | 3*657s*3*6I | 3*3*7Br*I*P | 3s*369I | 3*7*6s",  # noqa: E501
+            "OB(O)c1cc(C2CC2)cnc1Cl",
+            0.0,
+            id="insane-reward-hacking",
+        ),
+        pytest.param(
+            "CNCCC1CC1(F)F>CC#CC>",
+            "CNCCC1CC1(F)F",
+            0.0,
+            id="trivial-reactants",
+        ),
+        pytest.param(
+            "CC(C)CN1CC(O)C1.CC(C)CN1CC(O)CBr.CCO>CC#CC>",
+            "CC(C)CN1CC(O)C1",
+            0.0,
+            id="disallow-product-in-reactants",
+        ),
+        pytest.param(
+            "N#N.CCO>CC#CC.CC(C)CN1CC(O)C1>",
+            "CC(C)CN1CC(O)C1",
+            0.0,
+            id="disallow-product-in-reagents",
+        ),
+        pytest.param(
+            "C1(CN(C1)CC(C)C)O.CC(C)CN1CC(O)CBr.CCO>CC#CC>",
+            "CC(C)CN1CC(O)C1",
+            0.0,
+            id="disallow-product-in-reactants-with-different-smiles",
+        ),
+        pytest.param(
+            "C=CCNC(=O)Br.BrC#Cc1ccccc1.CCO>[Mg].c1ccccc1>",
+            "C=CCNC(=O)C#Cc1ccccc1",
+            0.0,
+            id="hacked-purchasability",
+        ),
+        pytest.param(
+            "CCC=O.CC1(C)CC(N)C(=O)N1>[B-](OC(=O)C)(OC(=O)C)OC(=O)C.[Na+].C=O.[THF]>CCCN(C=O)C1CC(C)(C)N(C(=O)C)O1",
+            "CCCN(C=O)C1CC(C)(C)N(C(=O)C)O1",
+            0.0,
+            id="invalid-reagent",
+        ),
+    ],
+)
+def test_oracle_rxn_eval(yhat: str, y: str, expected: float) -> None:
+    # Create a mock dictionary for purchasable molecules
+    # Some of these are actually purchasable (or not purchasable),
+    # but it's easier to just make it all explicit here.
+    # Especially if we change our definition of purchasable in the future.
+    mock_purchasable = {
+        "CC1(C)CC(N)C(=O)N1": True,
+        "XeCC1(C)CC(N)C(=O)N1": False,
+        "C=CCNC(=O)Br": False,
+        "CC(C)CN1CC(O)C1": True,
+        "CC1(C)(C)CC(N)C(=O)N1": False,
+        "C(P)(P)(P)CC=O": False,
+    }
+    with (
+        patch("ether0.rewards.fetch_purchasable", return_value=mock_purchasable),
+        patch("ether0.rewards.fetch_forward_rxn", return_value={"product": y}),
+    ):
+        metadata: dict[str, JsonValue] = {}
+        result = oracle_rxn_eval(yhat, y, metadata=metadata)
+        assert result == expected, (
+            f"Given {yhat=} and {y=}, expected {expected} but got {result} with"
+            f" {metadata=}."
+        )
+@pytest.mark.parametrize(
+    ("f1", "f2", "expected"),
+    [
+        pytest.param("C1", "C2", 1.0, id="simple-1"),
+        pytest.param("C1", "C1H1", 1.0, id="simple-2"),
+        pytest.param("C1H2", "C1H2", 0.0, id="simple-3"),
+        pytest.param("N2", "O2", 8**0.5, id="simple-4"),
+        pytest.param("X100C1", "X100C2", 1.0, id="bad-element-5"),
+        pytest.param("C100", "C100H100", 100, id="big-digits"),
+        pytest.param("CH2", "H2", 1, id="implicit"),
+    ],
+)
+def test_formula_diff(f1: str, f2: str, expected: float) -> None:
+    assert formula_diff(f1, f2) == expected
+@pytest.mark.parametrize(
+    ("mol", "ref_mol", "expected"),
+    [
+        pytest.param(
+            "O=C(/C=C/C1=CC=CC=C1)OC[C@H]1O[C@@H](O[C@@H]2O[C@@H]3C[C@H]4[C@H](O)[C@@H](O)[C@@](O)(CO3)[C@@H]24)[C@H](O)[C@@H](O)[C@@H]1O",
+            None,
+            1,
+            id="passing-1",
+        ),
+        pytest.param(
+            "CC(C)C[C@H](NC(=O)[C@H](Cc1c[nH]cn1)NC(=O)[C@H](Cc1ccccc1)NC(=O)OC(C)(C)C)[C@@H](O)[C@@H](O)CC(C)C",
+            None,
+            1,
+            id="passing-2",
+        ),
+        pytest.param("CCCCCBr", "CCCCCBr", 1, id="passing-3"),
+    ],
+)
+def test_is_reasonable_ring_system(
+    mol: str, ref_mol: str | None, expected: float
+) -> None:
+    mol_ = mol_from_smiles(mol)
+    assert mol_ is not None
+    assert (
+        is_reasonable_ring_system(mol_, mol_from_smiles(ref_mol) if ref_mol else None)
+        == expected
+    )
+@pytest.mark.parametrize(
+    ("mol", "ref_mol", "expected"),
+    [
+        pytest.param(
+            "O=C1OC2=CC=CC=C2C=C1c3ccc(O)c(O)c3c4ccc(O)cc4OCC=CCCCCCCC(N)(N)NS",
+            None,
+            False,
+            id="weird-nitrogen-group",
+        ),
+        pytest.param(
+            "O=S(=O)(N)c1c(Cl)cc2c(c1)S(=O)(=O)NCN2",
+            None,
+            True,
+            id="sulfonamide",
+        ),
+        pytest.param(
+            "C1=NC=NC=C1OCC=CCCC(N)S",
+            None,
+            False,
+            id="weird-S-C-N-group",
+        ),
+        pytest.param(
+            "CCC",
+            None,
+            True,
+            id="simple-alkane",
+        ),
+    ],
+)
+def test_is_reasonable_fp(mol: str, ref_mol: str | None, expected: bool) -> None:
+    mol_ = mol_from_smiles(mol)
+    assert mol_ is not None
+    assert (
+        is_reasonable_fp(mol_, ref_mol=mol_from_smiles(ref_mol) if ref_mol else None)
+        == expected
+    )
+@pytest.mark.parametrize(
+    ("yhat", "expected"),
+    [
+        ("CC(C)CCC", 1.0),
+        ("CC(C)(C)(C)C", 0.0),
+        ("", 0.0),
+        ("INVALID", 0.0),
+    ],
+)
+def test_valid_molecule_eval(yhat, expected):
+    assert valid_molecule_eval(yhat, y="") == expected