Spaces:

rizar001
/

LLaDA

Runtime error

App Files Files Community

rizar001 commited on 4 days ago

Commit

12e023b

verified ·

1 Parent(s): b21cf25

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

.gitattributes +3 -0
.gradio/certificate.pem +31 -0
GUIDELINES.md +140 -0
LICENSE +21 -0
README.md +158 -6
__pycache__/generate.cpython-310.pyc +0 -0
app.py +510 -0
chat.py +45 -0
generate.py +128 -0
get_log_likelihood.py +96 -0
imgs/LLaDA_vs_LLaMA.svg +2772 -0
imgs/LLaDA_vs_LLaMA_chat.svg +2665 -0
imgs/diff_remask.gif +3 -0
imgs/sample.png +3 -0
imgs/transformer1.png +0 -0
imgs/transformer2.png +3 -0
visualization/README.md +31 -0
visualization/generate.py +144 -0
visualization/html_to_png.py +30 -0
visualization/sample_process.txt +64 -0
visualization/visualization_paper.py +195 -0
visualization/visualization_zhihu.py +202 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+imgs/diff_remask.gif filter=lfs diff=lfs merge=lfs -text
+imgs/sample.png filter=lfs diff=lfs merge=lfs -text
+imgs/transformer2.png filter=lfs diff=lfs merge=lfs -text

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

GUIDELINES.md ADDED Viewed

	@@ -0,0 +1,140 @@

+# Guidelines
+Here, we provide guidelines for the model architecture, pre-training, SFT, and inference of LLaDA.
+## Model Architecture
+LLaDA employs a Transformer Encoder as the network architecture for its mask predictor.
+In terms of trainable parameters, the Transformer Encoder is identical to the Transformer
+Decoder. Starting from an autoregressive model, we derive the backbone of LLaDA by simply
+removing the causal mask from the self-attention mechanism as following.
+<div style="display: flex; justify-content: center; flex-wrap: wrap; gap: 50px;">
+    <img src="imgs/transformer1.png" style="width: 90%;" />
+    <img src="imgs/transformer2.png" style="width: 90%;" />
+</div>
+In addition, LLaDA designates a reserved token as the mask token (i.e., 126336).
+## Pre-training
+The pre-training of LLaDA is straightforward and simple. Starting from an existing
+autoregressive model training code, only a few lines need to be modified.
+We provide the core code (i.e., loss computation) here.
+```angular2html
+def forward_process(input_ids, eps=1e-3):
+    b, l = input_ids.shape
+    t = torch.rand(b, device=input_ids.device)
+    p_mask = (1 - eps) * t + eps
+    p_mask = p_mask[:, None].repeat(1, l)
+    masked_indices = torch.rand((b, l), device=input_ids.device) < p_mask
+    # 126336 is used for [MASK] token
+    noisy_batch = torch.where(masked_indices, 126336, input_ids)
+    return noisy_batch, masked_indices, p_mask
+# The data is an integer tensor of shape (b, 4096),
+# where b represents the batch size and 4096 is the sequence length.
+input_ids = batch["input_ids"]
+# We set 1% of the pre-training data to a random length that is uniformly sampled from the range [1, 4096].
+# The following implementation is not elegant and involves some data waste.
+# However, the data waste is minimal, so we ignore it.
+if torch.rand(1) < 0.01:
+    random_length = torch.randint(1, input_ids.shape[1] + 1, (1,))
+    input_ids = input_ids[:, :random_length]
+noisy_batch, masked_indices, p_mask = forward_process(input_ids)
+logits = model(input_ids=noisy_batch).logits
+token_loss = F.cross_entropy(logits[masked_indices], input_ids[masked_indices], reduction='none') / p_mask[masked_indices]
+loss = token_loss.sum() / (input_ids.shape[0] * input_ids.shape[1])
+```
+## SFT
+First, please refer to Appendix B.1 for the preprocessing of the SFT data. After preprocessing the data,
+the data format is as follows. For simplicity, we treat each word as a token and set the batch size to 2
+in the following visualization.
+```angular2html
+input_ids:
+<BOS><start_id>user<end_id>\nWhat is the capital of France?<eot_id><start_id>assistant<end_id>\nParis.<EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS><EOS>
+<BOS><start_id>user<end_id>\nWhat is the capital of Canada?<eot_id><start_id>assistant<end_id>\nThe capital of Canada is Ottawa, located in Ontario.<EOS>
+prompt_lengths:
+[17, 17]
+```
+After preprocessing the SFT data, we can obtain the SFT code by making simple modifications to the pre-training code.
+The key difference from pre-training is that SFT does not add noise to the prompt.
+```angular2html
+input_ids, prompt_lengths = batch["input_ids"], batch["prompt_lengths"]
+noisy_batch, _, p_mask = forward_process(input_ids)
+# Do not add noise to the prompt
+token_positions = torch.arange(noisy_batch.shape[1], device=noisy_batch.device).expand(noisy_batch.size(0), noisy_batch.size(1))
+prompt_mask = (temp_tensor < prompt_length.unsqueeze(1))
+noisy_batch[prompt_mask] = input_ids[prompt_mask]
+# Calculate the answer length (including the padded <EOS> tokens)
+prompt_mask = prompt_mask.to(torch.int64)
+answer_lengths = torch.sum((1 - prompt_mask), dim=-1, keepdim=True)
+answer_lengths = answer_length.repeat(1, noisy_batch.shape[1])
+masked_indices = (noisy_batch == 126336)
+logits = model(input_ids=noisy_batch).logits
+token_loss = F.cross_entropy(logits[masked_indices], input_ids[masked_indices], reduction='none') / p_mask[masked_indices]
+ce_loss = torch.sum(token_loss / answer_lengths[masked_indices]) / input_ids.shape[0]
+```
+## Sampling
+Overall, we categorize LLaDA's sampling process into three types: fixed-length, semi-autoregressive-origin, and semi-autoregressive-padding.
+**It is worth noting that the semi-autoregressive-origin method was not mentioned in our paper, nor did we provide the corresponding code**.
+However, we include it here because we believe that sharing both our failures and insights from the exploration process is valuable.
+These three sampling methods are illustrated in the figure below.
+<div style="display: flex; justify-content: center; flex-wrap: wrap; gap: 50px;">
+    <img src="imgs/sample.png" style="width: 100%;" />
+</div>
+For each step in the above three sampling processes, as detailed in Section 2.4 in our paper, the mask predictor
+first predicts all masked tokens simultaneously. Then, a certain proportion of these predictions are remasked.
+To determine which predicted tokens should be re-masked, we can adopt two strategies: *randomly remasking* or
+*low-confidence remasking*. Notably, both remasking strategies can be applied to all three sampling processes
+mentioned above.
+For the LLaDA-Base model, we adapt low-confidence remasking to the three sampling processes mentioned above.
+We find that fixed-length and semi-autoregressive-padding achieve similar results, whereas semi-autoregressive-origin
+performs slightly worse.
+For the LLaDA-Instruct model, the situation is slightly more complex.
+First, if the semi-autoregressive-origin method is used,
+the Instruct model performs poorly. This is because, during SFT, each sequence is a complete sentence (whereas in pre-training,
+many sequences are truncated sentences). As a result, during sampling, given a generated length, regardless of whether it is
+long or short, the Instruct model tends to generate a complete sentence. Unlike the Base model, it does not encounter cases
+where a sentence is only partially generated and needs to be continued.
+When performing fixed-length sampling with a high answer length (e.g., greater than 512),
+we find that low-confidence remasking results in an unusually high proportion of `<EOS>` tokens in
+the generated sentences, which severely impacts the model's performance. In contrast, this
+issue does not arise when randomly remasking is used.
+Furthermore, since low-confidence remasking achieved better results in the Base model, we also hoped that it could be applied to
+the Instruct model. We found that combining low-confidence remasking with semi-autoregressive-padding effectively mitigates
+the issue of generating an excessively high proportion of <EOS> tokens. Moreover, this combination achieves
+slightly better results than randomly remasking & fixed-length.
+You can find more details about the sampling method in our paper.

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 NieShenRuc
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,164 @@
 ---
 title: LLaDA
-emoji: 🚀
-colorFrom: red
-colorTo: indigo
 sdk: gradio
 sdk_version: 5.20.1
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: LLaDA
+app_file: app.py
 sdk: gradio
 sdk_version: 5.20.1
 ---
+# Large Language Diffusion Models
+[![arXiv](https://img.shields.io/badge/arXiv-2502.09992-red.svg)](https://arxiv.org/abs/2502.09992)
+[![deploy](https://img.shields.io/badge/Huggingface%20-LLaDA_Base%20-FFEB3B)](https://huggingface.co/GSAI-ML/LLaDA-8B-Base)
+[![deploy](https://img.shields.io/badge/Huggingface%20-LLaDA_Instruct%20-FFEB3B)](https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct)
+[![deploy](https://img.shields.io/badge/Zhihu-知乎-blue)](https://zhuanlan.zhihu.com/p/24214732238)
+We introduce LLaDA (<b>L</b>arge <b>La</b>nguage <b>D</b>iffusion with m<b>A</b>sking), a diffusion model with an unprecedented 8B scale, trained entirely from scratch,
+rivaling LLaMA3 8B in performance.
+<div style="display: flex; justify-content: center; flex-wrap: wrap;">
+    <img src="./imgs/LLaDA_vs_LLaMA.svg" style="width: 45%" />
+    <img src="./imgs/LLaDA_vs_LLaMA_chat.svg" style="width: 46%" />
+</div>
+## Inference
+The [LLaDA-8B-Base](https://huggingface.co/GSAI-ML/LLaDA-8B-Base) and [LLaDA-8B-Instruct](https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct) are upload
+in Huggingface. Please first install `transformers==4.38.2` and employ the [transformers](https://huggingface.co/docs/transformers/index) to load.
+```angular2html
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Base', trust_remote_code=True)
+model = AutoModel.from_pretrained('GSAI-ML/LLaDA-8B-Base', trust_remote_code=True, torch_dtype=torch.bfloat16)
+```
+We provide `get_log_likelihood()` and `generate()` functions in `get_log_likelihood.py`
+and `generate.py` respectively, for conditional likelihood evaluation and conditional generation.
+You can directly run `python chat.py` to have multi-round conversations with LLaDA-8B-Instruct.
+In addition, please refer to our paper and [GUIDELINES.md](GUIDELINES.md) for more details about the inference methods.
+## Pre-training and Supervised Fine-Tuning
+We will not provide the training framework and data as most open-source LLMs do.
+However, the pre-training and Supervised Fine-Tuning of LLaDA are straightforward. If
+you have a codebase for training an autoregressive model, you can modify it to
+adapt to LLaDA with just a few lines of code.
+We provide guidelines for the pre-training and SFT of LLaDA in [GUIDELINES.md](GUIDELINES.md).
+You can also refer to [SMDM](https://github.com/ML-GSAI/SMDM), which has a similar training process to LLaDA
+and has open-sourced the training framework.
+## FAQ
+Here, we address some common questions about LLaDA.
+### 0. How do I train my own LLaDA?
+Please refer to [GUIDELINES.md](GUIDELINES.md) for the guidelines.
+You can also refer to [SMDM](https://github.com/ML-GSAI/SMDM), which follows the same training
+process as LLaDA and has open-sourced its code.
+### 1. What is the difference between LLaDA and BERT?
+Our motivation is not to improve BERT, nor to apply image generation methods like [MaskGIT](https://arxiv.org/abs/2202.04200)
+to text. **Our goal is to explore a theoretically complete language modeling approach — masked diffusion models.**
+During this process, we simplified the approach and discovered that the loss function of masked diffusion models
+is related to the loss functions of BERT and MaskGIT. You can find our theoretical research process in Question 7.
+Specifically, LLaDA employs a masking ratio that varies randomly between 0 and 1, while BERT uses
+a fixed ratio. This subtle difference has significant implications. **The training
+objective of LLaDA is an upper bound on the negative log-likelihood of the model
+distribution, making LLaDA a generative model.** This enables LLaDA to naturally
+perform in-context learning, instruction-following, and ensures Fisher consistency
+for scalability with large datasets and models. You can also find a direct answer
+to this question in Section 2.1 of our paper.
+### 2. What is the relationship between LLaDA and Transformer?
+Network structure and probabilistic modeling are two distinct approaches that collectively form the
+foundation of language models. LLaDA, like GPT, adopts the
+Transformer architecture. The key difference lies in the probabilistic modeling approach: GPT
+utilizes an autoregressive next-token prediction method,
+while LLaDA employs a diffusion model for probabilistic modeling.
+### 3. What is the sampling efficiency of LLaDA?
+Currently, LLaDA's sampling speed is slower than the autoregressive baseline for three reasons:
+1. LLaDA samples with a fixed context length;
+2. LLaDA cannot yet leverage techniques like KV-Cache;
+3. LLaDA achieves optimal performance when the number of sampling steps equals the response length.
+Reducing the number of sampling steps leads to a decrease in performance, as detailed in Appendix B.4
+and Appendix B.6 of our paper.
+In this work, we aim to explore the upper limits of LLaDA's capabilities, **challenging the assumption
+that the key LLM abilities are inherently tied to autoregressive models**. We will continue
+to optimize its efficiency in the future. We believe this research approach is reasonable,
+as verifying the upper limits of diffusion language models' capabilities will provide us with
+more resources and sufficient motivation to optimize efficiency.
+Recall the development of diffusion models for images, from [DDPM](https://arxiv.org/abs/2006.11239)
+to the [Consistency model](https://arxiv.org/pdf/2410.11081), where sampling speed accelerated nearly
+1000 times over the course of 4 years. **We believe there is significant room for optimization in LLaDA's
+sampling efficiency as well**. Current solutions, including semi-autoregressive sampling (as
+detailed in [GUIDELINES.md](GUIDELINES.md)), can mitigate the fixed context length issue, and
+[consistency distillation](https://arxiv.org/pdf/2502.05415) can reduce the number of sampling steps.
+### 4. What is the training stability of LLaDA?
+For details on the pre-training process of LLaDA, please refer to Section 2.2 of our paper.
+During the total pre-training on 2.3T tokens, we encountered a training crash (loss becoming NaN)
+only once at 1.2T tokens. Our solution was to resume checkpoint and reduce
+the learning rate from 4e-4 to 1e-4.
+### 5. Why is the final answer "72" generated earlier than the intermediate calculation step (e.g., 12 × 4 = 48) in Tab4?
+**The mask predictor has successfully predicted the reasoning process. However, during the
+remasking process, the reasoning steps are masked out again.** As shown in the figure
+below, the non-white background represents the model's generation process, while the
+white-background boxes indicate the predictions made by the mask predictor at each step.
+We adopt a randomly remasking strategy.
+<div style="display: flex; justify-content: center; flex-wrap: wrap;">
+    <img src="./imgs/diff_remask.gif" style="width: 80%" />
+</div>
+### 6. Why does LLaDA answer 'Bailing' when asked 'Who are you'?
+This is because our pre-training and SFT data were designed for training an autoregressive model,
+whereas LLaDA directly utilizes data that contains identity markers.
+### 7. Our journey in developing LLaDA?
+LLaDA is built upon our two prior works, [RADD](https://arxiv.org/abs/2406.03736) and
+[SMDM](https://arxiv.org/abs/2410.18514).
+RADD demonstrated that the **training objective of LLaDA serves as an upper bound on the negative
+log-likelihood** of the model’s distribution, a conclusion also supported by [MD4](https://arxiv.org/abs/2406.04329)
+and [MDLM](https://arxiv.org/abs/2406.07524).
+Furthermore, RADD was the first to theoretically prove that **masked diffusion models do not require time t
+as an input to Transformer**. This insight provides the theoretical
+justification for LLaDA’s unmodified use of the Transformer architecture. Lastly,
+RADD showed that **the training objective of masked diffusion models is equivalent to that of
+any-order autoregressive models**, offering valuable insights into how masked diffusion models can
+overcome the reversal curse.
+SMDM introduces the first **scaling law** for masked diffusion models and demonstrates that, with the
+same model size and training data, masked diffusion models can achieve downstream benchmark results
+on par with those of autoregressive models. Additionally, SMDM presents a simple, **unsupervised
+classifier-free guidance** method that greatly improves downstream benchmark performance, which has
+been adopted by LLaDA.
+## Citation
+```bibtex
+@article{nie2025large,
+  title={Large Language Diffusion Models},
+  author={Nie, Shen and Zhu, Fengqi and You, Zebin and Zhang, Xiaolu and Ou, Jingyang and Hu, Jun and Zhou, Jun and Lin, Yankai and Wen, Ji-Rong and Li, Chongxuan},
+  journal={arXiv preprint arXiv:2502.09992},
+  year={2025}
+}
+```

__pycache__/generate.cpython-310.pyc ADDED Viewed

Binary file (4.47 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,510 @@

+import torch
+import numpy as np
+import gradio as gr
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+import time
+import re
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+print(f"Using device: {device}")
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True)
+model = AutoModel.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True,
+                                  torch_dtype=torch.bfloat16).to(device)
+# Constants
+MASK_TOKEN = "[MASK]"
+MASK_ID = 126336  # The token ID of [MASK] in LLaDA
+def parse_constraints(constraints_text):
+    """Parse constraints in format: 'position:word, position:word, ...'"""
+    constraints = {}
+    if not constraints_text:
+        return constraints
+    parts = constraints_text.split(',')
+    for part in parts:
+        if ':' not in part:
+            continue
+        pos_str, word = part.split(':', 1)
+        try:
+            pos = int(pos_str.strip())
+            word = word.strip()
+            if word and pos >= 0:
+                constraints[pos] = word
+        except ValueError:
+            continue
+    return constraints
+def format_chat_history(history):
+    """
+    Format chat history for the LLaDA model
+    Args:
+        history: List of [user_message, assistant_message] pairs
+    Returns:
+        Formatted conversation for the model
+    """
+    messages = []
+    for user_msg, assistant_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:  # Skip if None (for the latest user message)
+            messages.append({"role": "assistant", "content": assistant_msg})
+    return messages
+def add_gumbel_noise(logits, temperature):
+    '''
+    The Gumbel max is a method for sampling categorical distributions.
+    According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
+    Thus, we use float64.
+    '''
+    if temperature <= 0:
+        return logits
+    logits = logits.to(torch.float64)
+    noise = torch.rand_like(logits, dtype=torch.float64)
+    gumbel_noise = (- torch.log(noise)) ** temperature
+    return logits.exp() / gumbel_noise
+def get_num_transfer_tokens(mask_index, steps):
+    '''
+    In the reverse process, the interval [0, 1] is uniformly discretized into steps intervals.
+    Furthermore, because LLaDA employs a linear noise schedule (as defined in Eq. (8)),
+    the expected number of tokens transitioned at each step should be consistent.
+    This function is designed to precompute the number of tokens that need to be transitioned at each step.
+    '''
+    mask_num = mask_index.sum(dim=1, keepdim=True)
+    base = mask_num // steps
+    remainder = mask_num % steps
+    num_transfer_tokens = torch.zeros(mask_num.size(0), steps, device=mask_index.device, dtype=torch.int64) + base
+    for i in range(mask_num.size(0)):
+        num_transfer_tokens[i, :remainder[i]] += 1
+    return num_transfer_tokens
+def generate_response_with_visualization(messages, gen_length=64, steps=32,
+                                         constraints=None, temperature=0.0, cfg_scale=0.0, block_length=32,
+                                         remasking='low_confidence'):
+    """
+    Generate text with LLaDA model with visualization using the same sampling as in generate.py
+    Args:
+        messages: List of message dictionaries with 'role' and 'content'
+        gen_length: Length of text to generate
+        steps: Number of denoising steps
+        constraints: Dictionary mapping positions to words
+        temperature: Sampling temperature
+        cfg_scale: Classifier-free guidance scale
+        block_length: Block length for semi-autoregressive generation
+        remasking: Remasking strategy ('low_confidence' or 'random')
+    Returns:
+        List of visualization states showing the progression and final text
+    """
+    # Process constraints
+    if constraints is None:
+        constraints = {}
+    # Convert any string constraints to token IDs
+    processed_constraints = {}
+    for pos, word in constraints.items():
+        tokens = tokenizer.encode(" " + word, add_special_tokens=False)
+        for i, token_id in enumerate(tokens):
+            processed_constraints[pos + i] = token_id
+    # Prepare the prompt using chat template
+    chat_input = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+    input_ids = tokenizer(chat_input)['input_ids']
+    input_ids = torch.tensor(input_ids).to(device).unsqueeze(0)
+    # For generation
+    prompt_length = input_ids.shape[1]
+    # Initialize the sequence with masks for the response part
+    x = torch.full((1, prompt_length + gen_length), MASK_ID, dtype=torch.long).to(device)
+    x[:, :prompt_length] = input_ids.clone()
+    # Initialize visualization states for the response part
+    visualization_states = []
+    # Add initial state (all masked)
+    initial_state = [(MASK_TOKEN, "#444444") for _ in range(gen_length)]
+    visualization_states.append(initial_state)
+    # Apply constraints to the initial state
+    for pos, token_id in processed_constraints.items():
+        absolute_pos = prompt_length + pos
+        if absolute_pos < x.shape[1]:
+            x[:, absolute_pos] = token_id
+    # Mark prompt positions to exclude them from masking during classifier-free guidance
+    prompt_index = (x != MASK_ID)
+    # Ensure block_length is valid
+    if block_length > gen_length:
+        block_length = gen_length
+    # Calculate number of blocks
+    num_blocks = gen_length // block_length
+    if gen_length % block_length != 0:
+        num_blocks += 1
+    # Adjust steps per block
+    steps_per_block = steps // num_blocks
+    if steps_per_block < 1:
+        steps_per_block = 1
+    # Track the current state of x for visualization
+    current_x = x.clone()
+    # Process each block
+    for num_block in range(num_blocks):
+        # Calculate the start and end indices for the current block
+        block_start = prompt_length + num_block * block_length
+        block_end = min(prompt_length + (num_block + 1) * block_length, x.shape[1])
+        # Get mask indices for the current block
+        block_mask_index = (x[:, block_start:block_end] == MASK_ID)
+        # Skip if no masks in this block
+        if not block_mask_index.any():
+            continue
+        # Calculate number of tokens to unmask at each step
+        num_transfer_tokens = get_num_transfer_tokens(block_mask_index, steps_per_block)
+        # Process each step
+        for i in range(steps_per_block):
+            # Get all mask positions in the current sequence
+            mask_index = (x == MASK_ID)
+            # Skip if no masks
+            if not mask_index.any():
+                break
+            # Apply classifier-free guidance if enabled
+            if cfg_scale > 0.0:
+                un_x = x.clone()
+                un_x[prompt_index] = MASK_ID
+                x_ = torch.cat([x, un_x], dim=0)
+                logits = model(x_).logits
+                logits, un_logits = torch.chunk(logits, 2, dim=0)
+                logits = un_logits + (cfg_scale + 1) * (logits - un_logits)
+            else:
+                logits = model(x).logits
+            # Apply Gumbel noise for sampling
+            logits_with_noise = add_gumbel_noise(logits, temperature=temperature)
+            x0 = torch.argmax(logits_with_noise, dim=-1)
+            # Calculate confidence scores for remasking
+            if remasking == 'low_confidence':
+                p = F.softmax(logits.to(torch.float64), dim=-1)
+                x0_p = torch.squeeze(
+                    torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1)  # b, l
+            elif remasking == 'random':
+                x0_p = torch.rand((x0.shape[0], x0.shape[1]), device=x0.device)
+            else:
+                raise NotImplementedError(f"Remasking strategy '{remasking}' not implemented")
+            # Don't consider positions beyond the current block
+            x0_p[:, block_end:] = -float('inf')
+            # Apply predictions where we have masks
+            old_x = x.clone()
+            x0 = torch.where(mask_index, x0, x)
+            confidence = torch.where(mask_index, x0_p, -float('inf'))
+            # Select tokens to unmask based on confidence
+            transfer_index = torch.zeros_like(x0, dtype=torch.bool, device=x0.device)
+            for j in range(confidence.shape[0]):
+                # Only consider positions within the current block for unmasking
+                block_confidence = confidence[j, block_start:block_end]
+                if i < steps_per_block - 1:  # Not the last step
+                    # Take top-k confidences
+                    _, select_indices = torch.topk(block_confidence,
+                                                  k=min(num_transfer_tokens[j, i].item(),
+                                                       block_confidence.numel()))
+                    # Adjust indices to global positions
+                    select_indices = select_indices + block_start
+                    transfer_index[j, select_indices] = True
+                else:  # Last step - unmask everything remaining
+                    transfer_index[j, block_start:block_end] = mask_index[j, block_start:block_end]
+            # Apply the selected tokens
+            x = torch.where(transfer_index, x0, x)
+            # Ensure constraints are maintained
+            for pos, token_id in processed_constraints.items():
+                absolute_pos = prompt_length + pos
+                if absolute_pos < x.shape[1]:
+                    x[:, absolute_pos] = token_id
+            # Create visualization state only for the response part
+            current_state = []
+            for i in range(gen_length):
+                pos = prompt_length + i  # Absolute position in the sequence
+                if x[0, pos] == MASK_ID:
+                    # Still masked
+                    current_state.append((MASK_TOKEN, "#444444"))  # Dark gray for masks
+                elif old_x[0, pos] == MASK_ID:
+                    # Newly revealed in this step
+                    token = tokenizer.decode([x[0, pos].item()], skip_special_tokens=True)
+                    # Color based on confidence
+                    confidence = float(x0_p[0, pos].cpu())
+                    if confidence < 0.3:
+                        color = "#FF6666"  # Light red
+                    elif confidence < 0.7:
+                        color = "#FFAA33"  # Orange
+                    else:
+                        color = "#66CC66"  # Light green
+                    current_state.append((token, color))
+                else:
+                    # Previously revealed
+                    token = tokenizer.decode([x[0, pos].item()], skip_special_tokens=True)
+                    current_state.append((token, "#6699CC"))  # Light blue
+            visualization_states.append(current_state)
+    # Extract final text (just the assistant's response)
+    response_tokens = x[0, prompt_length:]
+    final_text = tokenizer.decode(response_tokens,
+                               skip_special_tokens=True,
+                               clean_up_tokenization_spaces=True)
+    return visualization_states, final_text
+css = '''
+.category-legend{display:none}
+button{height: 60px}
+'''
+def create_chatbot_demo():
+    with gr.Blocks(css=css) as demo:
+        gr.Markdown("# LLaDA - Large Language Diffusion Model Demo")
+        gr.Markdown("[model](https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct), [project page](https://ml-gsai.github.io/LLaDA-demo/)")
+        # STATE MANAGEMENT
+        chat_history = gr.State([])
+        # UI COMPONENTS
+        with gr.Row():
+            with gr.Column(scale=3):
+                chatbot_ui = gr.Chatbot(label="Conversation", height=500)
+                # Message input
+                with gr.Group():
+                    with gr.Row():
+                        user_input = gr.Textbox(
+                            label="Your Message",
+                            placeholder="Type your message here...",
+                            show_label=False
+                        )
+                        send_btn = gr.Button("Send")
+                constraints_input = gr.Textbox(
+                    label="Word Constraints",
+                    info="This model allows for placing specific words at specific positions using 'position:word' format. Example: 1st word once, 6th word 'upon' and 11th word 'time', would be: '0:Once, 5:upon, 10:time",
+                    placeholder="0:Once, 5:upon, 10:time",
+                    value=""
+                )
+            with gr.Column(scale=2):
+                output_vis = gr.HighlightedText(
+                    label="Denoising Process Visualization",
+                    combine_adjacent=False,
+                    show_legend=True,
+                )
+        # Advanced generation settings
+        with gr.Accordion("Generation Settings", open=False):
+            with gr.Row():
+                gen_length = gr.Slider(
+                    minimum=16, maximum=128, value=64, step=8,
+                    label="Generation Length"
+                )
+                steps = gr.Slider(
+                    minimum=8, maximum=64, value=64, step=4,
+                    label="Denoising Steps"
+                )
+            with gr.Row():
+                temperature = gr.Slider(
+                    minimum=0.0, maximum=1.0, value=0.5, step=0.1,
+                    label="Temperature"
+                )
+                cfg_scale = gr.Slider(
+                    minimum=0.0, maximum=2.0, value=0.0, step=0.1,
+                    label="CFG Scale"
+                )
+            with gr.Row():
+                block_length = gr.Slider(
+                    minimum=8, maximum=128, value=32, step=8,
+                    label="Block Length"
+                )
+                remasking_strategy = gr.Radio(
+                    choices=["low_confidence", "random"],
+                    value="low_confidence",
+                    label="Remasking Strategy"
+                )
+            with gr.Row():
+                visualization_delay = gr.Slider(
+                    minimum=0.0, maximum=1.0, value=0.05, step=0.01,
+                    label="Visualization Delay (seconds)"
+                )
+        # Current response text box (hidden)
+        current_response = gr.Textbox(
+            label="Current Response",
+            placeholder="The assistant's response will appear here...",
+            lines=3,
+            visible=False
+        )
+        # Clear button
+        clear_btn = gr.Button("Clear Conversation")
+        # HELPER FUNCTIONS
+        def add_message(history, message, response):
+            """Add a message pair to the history and return the updated history"""
+            history = history.copy()
+            history.append([message, response])
+            return history
+        def user_message_submitted(message, history, gen_length, steps, constraints, delay):
+            """Process a submitted user message"""
+            # Skip empty messages
+            if not message.strip():
+                # Return current state unchanged
+                history_for_display = history.copy()
+                return history, history_for_display, "", [], ""
+            # Add user message to history
+            history = add_message(history, message, None)
+            # Format for display - temporarily show user message with empty response
+            history_for_display = history.copy()
+            # Clear the input
+            message_out = ""
+            # Return immediately to update UI with user message
+            return history, history_for_display, message_out, [], ""
+        def bot_response(history, gen_length, steps, constraints, delay, temperature, cfg_scale, block_length, remasking):
+            """Generate bot response for the latest message"""
+            if not history:
+                return history, [], ""
+            # Get the last user message
+            last_user_message = history[-1][0]
+            try:
+                # Format all messages except the last one (which has no response yet)
+                messages = format_chat_history(history[:-1])
+                # Add the last user message
+                messages.append({"role": "user", "content": last_user_message})
+                # Parse constraints
+                parsed_constraints = parse_constraints(constraints)
+                # Generate response with visualization
+                vis_states, response_text = generate_response_with_visualization(
+                    messages,
+                    gen_length=gen_length,
+                    steps=steps,
+                    constraints=parsed_constraints,
+                    temperature=temperature,
+                    cfg_scale=cfg_scale,
+                    block_length=block_length,
+                    remasking=remasking
+                )
+                # Update history with the assistant's response
+                history[-1][1] = response_text
+                # Return the initial state immediately
+                yield history, vis_states[0], response_text
+                # Then animate through visualization states
+                for state in vis_states[1:]:
+                    time.sleep(delay)
+                    yield history, state, response_text
+            except Exception as e:
+                error_msg = f"Error: {str(e)}"
+                print(error_msg)
+                # Show error in visualization
+                error_vis = [(error_msg, "red")]
+                # Don't update history with error
+                yield history, error_vis, error_msg
+        def clear_conversation():
+            """Clear the conversation history"""
+            return [], [], "", []
+        # EVENT HANDLERS
+        # Clear button handler
+        clear_btn.click(
+            fn=clear_conversation,
+            inputs=[],
+            outputs=[chat_history, chatbot_ui, current_response, output_vis]
+        )
+        # User message submission flow (2-step process)
+        # Step 1: Add user message to history and update UI
+        msg_submit = user_input.submit(
+            fn=user_message_submitted,
+            inputs=[user_input, chat_history, gen_length, steps, constraints_input, visualization_delay],
+            outputs=[chat_history, chatbot_ui, user_input, output_vis, current_response]
+        )
+        # Also connect the send button
+        send_click = send_btn.click(
+            fn=user_message_submitted,
+            inputs=[user_input, chat_history, gen_length, steps, constraints_input, visualization_delay],
+            outputs=[chat_history, chatbot_ui, user_input, output_vis, current_response]
+        )
+        # Step 2: Generate bot response
+        # This happens after the user message is displayed
+        msg_submit.then(
+            fn=bot_response,
+            inputs=[
+                chat_history, gen_length, steps, constraints_input,
+                visualization_delay, temperature, cfg_scale, block_length,
+                remasking_strategy
+            ],
+            outputs=[chatbot_ui, output_vis, current_response]
+        )
+        send_click.then(
+            fn=bot_response,
+            inputs=[
+                chat_history, gen_length, steps, constraints_input,
+                visualization_delay, temperature, cfg_scale, block_length,
+                remasking_strategy
+            ],
+            outputs=[chatbot_ui, output_vis, current_response]
+        )
+    return demo
+# Launch the demo
+if __name__ == "__main__":
+    demo = create_chatbot_demo()
+    demo.queue().launch(share=True)

chat.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+from generate import generate
+from transformers import AutoTokenizer, AutoModel
+def chat():
+    device = 'cuda'
+    model = AutoModel.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True, torch_dtype=torch.bfloat16).to(device).eval()
+    tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True)
+    gen_length = 128
+    steps = 128
+    print('*' * 66)
+    print(f'**  Answer Length: {gen_length}  |  Sampling Steps: {steps}  **')
+    print('*' * 66)
+    conversation_num = 0
+    while True:
+        user_input = input("Enter your question: ")
+        m = [{"role": "user", "content": user_input}]
+        user_input = tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False)
+        input_ids = tokenizer(user_input)['input_ids']
+        input_ids = torch.tensor(input_ids).to(device).unsqueeze(0)
+        if conversation_num == 0:
+            prompt = input_ids
+        else:
+            prompt = torch.cat([prompt, input_ids[:, 1:]], dim=1)
+        out = generate(model, prompt, steps=steps, gen_length=gen_length, block_length=32, temperature=0., cfg_scale=0., remasking='low_confidence')
+        answer = tokenizer.batch_decode(out[:, prompt.shape[1]:], skip_special_tokens=True)[0]
+        print(f"Bot's reply: {answer}")
+        # remove the <EOS>
+        prompt = out[out != 126081].unsqueeze(0)
+        conversation_num += 1
+        print('-----------------------------------------------------------------------')
+if __name__ == "__main__":
+    chat()

generate.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import torch
+import numpy as np
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+def add_gumbel_noise(logits, temperature):
+    '''
+    The Gumbel max is a method for sampling categorical distributions.
+    According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
+    Thus, we use float64.
+    '''
+    logits = logits.to(torch.float64)
+    noise = torch.rand_like(logits, dtype=torch.float64)
+    gumbel_noise = (- torch.log(noise)) ** temperature
+    return logits.exp() / gumbel_noise
+def get_num_transfer_tokens(mask_index, steps):
+    '''
+    In the reverse process, the interval [0, 1] is uniformly discretized into steps intervals.
+    Furthermore, because LLaDA employs a linear noise schedule (as defined in Eq. (8)),
+    the expected number of tokens transitioned at each step should be consistent.
+    This function is designed to precompute the number of tokens that need to be transitioned at each step.
+    '''
+    mask_num = mask_index.sum(dim=1, keepdim=True)
+    base = mask_num // steps
+    remainder = mask_num % steps
+    num_transfer_tokens = torch.zeros(mask_num.size(0), steps, device=mask_index.device, dtype=torch.int64) + base
+    for i in range(mask_num.size(0)):
+        num_transfer_tokens[i, :remainder[i]] += 1
+    return num_transfer_tokens
+@ torch.no_grad()
+def generate(model, prompt, steps=128, gen_length=128, block_length=128, temperature=0.,
+             cfg_scale=0., remasking='low_confidence', mask_id=126336):
+    '''
+    Args:
+        model: Mask predictor.
+        prompt: A tensor of shape (1, l).
+        steps: Sampling steps, less than or equal to gen_length.
+        gen_length: Generated answer length.
+        block_length: Block length, less than or equal to gen_length. If less than gen_length, it means using semi_autoregressive remasking.
+        temperature: Categorical distribution sampling temperature.
+        cfg_scale: Unsupervised classifier-free guidance scale.
+        remasking: Remasking strategy. 'low_confidence' or 'random'.
+        mask_id: The toke id of [MASK] is 126336.
+    '''
+    x = torch.full((1, prompt.shape[1] + gen_length), mask_id, dtype=torch.long).to(model.device)
+    x[:, :prompt.shape[1]] = prompt.clone()
+    prompt_index = (x != mask_id)
+    assert gen_length % block_length == 0
+    num_blocks = gen_length // block_length
+    assert steps % num_blocks == 0
+    steps = steps // num_blocks
+    for num_block in range(num_blocks):
+        block_mask_index = (x[:, prompt.shape[1] + num_block * block_length: prompt.shape[1] + (num_block + 1) * block_length:] == mask_id)
+        num_transfer_tokens = get_num_transfer_tokens(block_mask_index, steps)
+        for i in range(steps):
+            mask_index = (x == mask_id)
+            if cfg_scale > 0.:
+                un_x = x.clone()
+                un_x[prompt_index] = mask_id
+                x_ = torch.cat([x, un_x], dim=0)
+                logits = model(x_).logits
+                logits, un_logits = torch.chunk(logits, 2, dim=0)
+                logits = un_logits + (cfg_scale + 1) * (logits - un_logits)
+            else:
+                logits = model(x).logits
+            logits_with_noise = add_gumbel_noise(logits, temperature=temperature)
+            x0 = torch.argmax(logits_with_noise, dim=-1) # b, l
+            if remasking == 'low_confidence':
+                p = F.softmax(logits.to(torch.float64), dim=-1)
+                x0_p = torch.squeeze(
+                    torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1) # b, l
+            elif remasking == 'random':
+                x0_p = torch.rand((x0.shape[0], x0.shape[1]), device=x0.device)
+            else:
+                raise NotImplementedError(remasking)
+            x0_p[:, prompt.shape[1] + (num_block + 1) * block_length:] = -np.inf
+            x0 = torch.where(mask_index, x0, x)
+            confidence = torch.where(mask_index, x0_p, -np.inf)
+            transfer_index = torch.zeros_like(x0, dtype=torch.bool, device=x0.device)
+            for j in range(confidence.shape[0]):
+                _, select_index = torch.topk(confidence[j], k=num_transfer_tokens[j, i])
+                transfer_index[j, select_index] = True
+            x[transfer_index] = x0[transfer_index]
+    return x
+def main():
+    device = 'cuda'
+    model = AutoModel.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True, torch_dtype=torch.bfloat16).to(device).eval()
+    tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True)
+    prompt = "Lily can run 12 kilometers per hour for 4 hours. After that, she runs 6 kilometers per hour. How many kilometers can she run in 8 hours?"
+    # Add special tokens for the Instruct model. The Base model does not require the following two lines.
+    m = [{"role": "user", "content": prompt}, ]
+    prompt = tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False)
+    input_ids = tokenizer(prompt)['input_ids']
+    input_ids = torch.tensor(input_ids).to(device).unsqueeze(0)
+    out = generate(model, input_ids, steps=128, gen_length=128, block_length=32, temperature=0., cfg_scale=0., remasking='low_confidence')
+    print(tokenizer.batch_decode(out[:, input_ids.shape[1]:], skip_special_tokens=True)[0])
+if __name__ == '__main__':
+    main()

get_log_likelihood.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+def forward_process(batch, prompt_index, mask_id):
+    b, l = batch.shape
+    target_len = (l - prompt_index.sum()).item()
+    k = torch.randint(1, target_len + 1, (), device=batch.device)
+    x = torch.round(torch.linspace(float(k), k + (b - 1) * (target_len / b), steps=b, device=batch.device)).long()
+    x = ((x - 1) % target_len) + 1
+    assert x.min() >= 1 and x.max() <= target_len
+    indices = torch.arange(target_len, device=batch.device).repeat(b, 1)
+    is_mask = indices < x.unsqueeze(1)
+    for i in range(b):
+        is_mask[i] = is_mask[i][torch.randperm(target_len)]
+    is_mask = torch.cat((torch.zeros(b, prompt_index.sum(), dtype=torch.bool, device=batch.device), is_mask), dim=1)
+    noisy_batch = torch.where(is_mask, mask_id, batch)
+    # Return the masked batch and the mask ratio
+    return noisy_batch, (x / target_len).unsqueeze(1).repeat(1, l)
+def get_logits(model, batch, prompt_index, cfg_scale, mask_id):
+    if cfg_scale > 0.:
+        assert len(prompt_index) == batch.shape[1]
+        prompt_index = prompt_index.unsqueeze(0).repeat(batch.shape[0], 1)
+        un_batch = batch.clone()
+        un_batch[prompt_index] = mask_id
+        batch = torch.cat([batch, un_batch])
+    input = batch
+    logits = model(input).logits
+    if cfg_scale > 0.:
+        logits, un_logits = torch.chunk(logits, 2, dim=0)
+        logits = un_logits + (cfg_scale + 1) * (logits - un_logits)
+    return logits
+@ torch.no_grad()
+def get_log_likelihood(model, prompt, answer, mc_num=128, batch_size=16, cfg_scale=0., mask_id=126336):
+    '''
+    Args:
+        model: Mask predictor.
+        prompt: A tensor of shape (l1).
+        answer: A tensor of shape (l2).
+        mc_num: Monte Carlo estimation times.
+                As detailed in Appendix B.5. Since MMLU, CMMLU, and C-EVAL only require the likelihood of a single token, a
+                single Monte Carlo estimate is sufficient for these benchmarks. For all other benchmarks, we find that 128
+                Monte Carlo samples are adequate to produce stable results.
+        batch_size: Mini batch size.
+        cfg_scale: Unsupervised classifier-free guidance scale.
+        mask_id: The toke id of [MASK] is 126336.
+    '''
+    seq = torch.concatenate([prompt, answer])[None, :]
+    seq = seq.repeat((batch_size, 1)).to(model.device)
+    prompt_index = torch.arange(seq.shape[1], device=model.device) < len(prompt)
+    loss_ = []
+    for _ in range(mc_num // batch_size):
+        perturbed_seq, p_mask = forward_process(seq, prompt_index, mask_id)
+        mask_index = perturbed_seq == mask_id
+        logits = get_logits(model, perturbed_seq, prompt_index, cfg_scale, mask_id)
+        loss = F.cross_entropy(logits[mask_index], seq[mask_index], reduction='none') / p_mask[mask_index]
+        loss = loss.sum() / batch_size
+        loss_.append(loss.item())
+    return - sum(loss_) / len(loss_)
+def main():
+    device = 'cuda'
+    model = AutoModel.from_pretrained('GSAI-ML/LLaDA-8B-Base', trust_remote_code=True, torch_dtype=torch.bfloat16).to(device).eval()
+    tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Base', trust_remote_code=True)
+    # this prompt and answer is from Hellaswag dataset
+    prompt = 'Roof shingle removal: A man is sitting on a roof. He'
+    answer = ' is using wrap to wrap a pair of skis.'
+    prompt = torch.tensor(tokenizer(prompt)['input_ids']).to(device)
+    answer = torch.tensor(tokenizer(answer)['input_ids']).to(device)
+    print(get_log_likelihood(model, prompt, answer, mc_num=128))
+if __name__ == '__main__':
+    main()

imgs/LLaDA_vs_LLaMA.svg ADDED Viewed

imgs/LLaDA_vs_LLaMA_chat.svg ADDED Viewed

imgs/diff_remask.gif ADDED Viewed

Git LFS Details

SHA256: 0c97f2e338df118984e08456964abc5d0da2119e867066429c218c2a26f7dd3a
Pointer size: 132 Bytes
Size of remote file: 9.13 MB

imgs/sample.png ADDED Viewed

Git LFS Details

SHA256: 4e35901be05e2cf4bbde8fc79c32286cd127600b3b62763f240aae989dda12ca
Pointer size: 131 Bytes
Size of remote file: 298 kB

imgs/transformer1.png ADDED Viewed

imgs/transformer2.png ADDED Viewed

Git LFS Details

SHA256: 8b00226e8c9a653c8efdd0a858d1baf53f7a71817853e3c8d2e60b82b21e5b5c
Pointer size: 131 Bytes
Size of remote file: 175 kB

visualization/README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+# Visualization
+This repository contains visualization tools for the LLaDA project.
+## Implementation Steps
+### Step 1: Generate Sampling Process
+Run `generate.py` to produce your own sampling process records. A sample output (`sample_process.txt`) is included for reference. You have the option to:
+- Utilize the provided generate.py script
+- Modify both the prompt and generation parameters
+### Step 2: Generate Visualization HTML
+Choose between two visualization styles:
+- **Paper Style**:
+  `visualization_paper.py` produces visualizations matching the format in [our arXiv paper](https://arxiv.org/abs/2502.09992)
+- **Zhihu Style**:
+  `visualization_zhihu.py` generates visualizations compatible with [Zhihu's format](https://zhuanlan.zhihu.com/p/24214732238)
+The scripts will:
+1. Automatically create an `html/` directory
+2. Generate individual HTML files for each sampling step
+*Note: The current implementation defaults to 64 sampling steps.
+### Step 3: Create PNG Sequences
+Convert generated HTML files to PNG format for GIF creation. These image sequences can be used with any standard GIF generator to visualize the complete sampling process.
+## Technical Notes
+- Ensure Python 3.8+ environment
+- Install required dependencies: `pip install html2image`
+- For custom configurations, modify constants at the beginning of each script

visualization/generate.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import torch
+import numpy as np
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+def add_gumbel_noise(logits, temperature):
+    '''
+    The Gumbel max is a method for sampling categorical distributions.
+    According to arXiv:2409.02908, for MDM, low-precision Gumbel Max improves perplexity score but reduces generation quality.
+    Thus, we use float64.
+    '''
+    logits = logits.to(torch.float64)
+    noise = torch.rand_like(logits, dtype=torch.float64)
+    gumbel_noise = (- torch.log(noise)) ** temperature
+    return logits.exp() / gumbel_noise
+def get_num_transfer_tokens(mask_index, steps):
+    '''
+    In the reverse process, the interval [0, 1] is uniformly discretized into steps intervals.
+    Furthermore, because LLaDA employs a linear noise schedule (as defined in Eq. (8)),
+    the expected number of tokens transitioned at each step should be consistent.
+    This function is designed to precompute the number of tokens that need to be transitioned at each step.
+    '''
+    mask_num = mask_index.sum(dim=1, keepdim=True)
+    base = mask_num // steps
+    remainder = mask_num % steps
+    num_transfer_tokens = torch.zeros(mask_num.size(0), steps, device=mask_index.device, dtype=torch.int64) + base
+    for i in range(mask_num.size(0)):
+        num_transfer_tokens[i, :remainder[i]] += 1
+    return num_transfer_tokens
+@ torch.no_grad()
+def generate(model, prompt, tokenizer, steps=128, gen_length=128, block_length=128, temperature=0.,
+             cfg_scale=0., remasking='low_confidence', mask_id=126336):
+    '''
+    Args:
+        model: Mask predictor.
+        prompt: A tensor of shape (1, l).
+        steps: Sampling steps, less than or equal to gen_length.
+        gen_length: Generated answer length.
+        block_length: Block length, less than or equal to gen_length. If less than gen_length, it means using semi_autoregressive remasking.
+        temperature: Categorical distribution sampling temperature.
+        cfg_scale: Unsupervised classifier-free guidance scale.
+        remasking: Remasking strategy. 'low_confidence' or 'random'.
+        mask_id: The toke id of [MASK] is 126336.
+    '''
+    x = torch.full((1, prompt.shape[1] + gen_length), mask_id, dtype=torch.long).to(model.device)
+    x[:, :prompt.shape[1]] = prompt.clone()
+    prompt_index = (x != mask_id)
+    assert gen_length % block_length == 0
+    num_blocks = gen_length // block_length
+    assert steps % num_blocks == 0
+    steps = steps // num_blocks
+    print_i = 0
+    for num_block in range(num_blocks):
+        block_mask_index = (x[:, prompt.shape[1] + num_block * block_length: prompt.shape[1] + (num_block + 1) * block_length:] == mask_id)
+        num_transfer_tokens = get_num_transfer_tokens(block_mask_index, steps)
+        for i in range(steps):
+            mask_index = (x == mask_id)
+            if cfg_scale > 0.:
+                un_x = x.clone()
+                un_x[prompt_index] = mask_id
+                x_ = torch.cat([x, un_x], dim=0)
+                logits = model(x_).logits
+                logits, un_logits = torch.chunk(logits, 2, dim=0)
+                logits = un_logits + (cfg_scale + 1) * (logits - un_logits)
+            else:
+                logits = model(x).logits
+            logits_with_noise = add_gumbel_noise(logits, temperature=temperature)
+            x0 = torch.argmax(logits_with_noise, dim=-1) # b, l
+            if remasking == 'low_confidence':
+                p = F.softmax(logits.to(torch.float64), dim=-1)
+                x0_p = torch.squeeze(
+                    torch.gather(p, dim=-1, index=torch.unsqueeze(x0, -1)), -1) # b, l
+            elif remasking == 'random':
+                x0_p = torch.rand((x0.shape[0], x0.shape[1]), device=x0.device)
+            else:
+                raise NotImplementedError(remasking)
+            x0_p[:, prompt.shape[1] + (num_block + 1) * block_length:] = -np.inf
+            x0 = torch.where(mask_index, x0, x)
+            confidence = torch.where(mask_index, x0_p, -np.inf)
+            transfer_index = torch.zeros_like(x0, dtype=torch.bool, device=x0.device)
+            for j in range(confidence.shape[0]):
+                _, select_index = torch.topk(confidence[j], k=num_transfer_tokens[j, i])
+                transfer_index[j, select_index] = True
+            x[transfer_index] = x0[transfer_index]
+            print_i = print_i + 1
+            # Get generated token sequence (assuming batch_size=1)
+            generated_token_ids = x[0, prompt.shape[1]:]  # Take first sample by reducing dimension
+            formatted_output = []
+            for token_id in generated_token_ids:
+                # Decode single token and handle newlines
+                decoded_token = tokenizer.decode(token_id).replace("\n", " ").replace("<|eot_id|>", " ").replace("<|endoftext|>", " ")
+                # Add asterisk wrapping (preserve original space positions)
+                formatted_token = f"*{decoded_token}&"
+                formatted_output.append(formatted_token)
+            # Combine final output
+            final_output = "".join(formatted_output).strip()
+            print(f"{print_i}, {final_output}", file=open("sample_process.txt", "a"))
+    return x
+def main():
+    device = 'cuda'
+    model = AutoModel.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True, torch_dtype=torch.bfloat16).to(device).eval()
+    tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True)
+    prompt = "Explain what artificial intelligence is."
+    # Add special tokens for the Instruct model. The Base model does not require the following two lines.
+    m = [{"role": "user", "content": prompt}, ]
+    prompt = tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False)
+    input_ids = tokenizer(prompt)['input_ids']
+    input_ids = torch.tensor(input_ids).to(device).unsqueeze(0)
+    out = generate(model, input_ids, tokenizer, steps=64, gen_length=64, block_length=64, temperature=0., cfg_scale=0., remasking='random')
+if __name__ == '__main__':
+    main()

visualization/html_to_png.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from html2image import Html2Image
+import os
+# Define the types array to process
+types = ['zhihu', 'paper']  # Add all types you need to process here
+# Initialize Html2Image object
+hti = Html2Image()
+hti.browser.use_new_headless = None  # Keep default settings
+for type_txt in types:
+    # Ensure png directory exists
+    output_dir = os.path.join('png', f"sample_process_{type_txt}")
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Set output path for current type
+    hti.output_path = output_dir
+    # Loop to generate screenshots
+    for i in range(1, 65):
+        # Get HTML file path
+        html_path = os.path.join('html', f"sample_process_{type_txt}", f'visualization_step_{i}.html')
+        # Generate and save screenshot
+        hti.screenshot(
+            url=html_path,
+            save_as=f'visualization_step_{i}.png',
+            size=(1200, 500) if type_txt == 'zhihu' else (1200, 800)
+        )

visualization/sample_process.txt ADDED Viewed

	@@ -0,0 +1,64 @@

+1, *<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+2, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+3, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+4, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+5, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+6, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+7, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+8, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+9, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&
+10, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+11, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+12, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+13, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+14, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+15, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+16, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* reason&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+17, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* reason&*,&* and&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&*<|mdm_mask|>&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+18, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* reason&*,&* and&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+19, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* reason&*,&* and&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+20, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* reason&*,&* and&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+21, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&*<|mdm_mask|>&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+22, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+23, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&*<|mdm_mask|>&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+24, *<|mdm_mask|>&*<|mdm_mask|>&* (&*<|mdm_mask|>&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&* make&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+25, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&* make&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+26, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* computer&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&* make&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+27, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* computer&*<|mdm_mask|>&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+28, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* computer&* systems&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+29, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&*<|mdm_mask|>&* computer&* systems&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*<|mdm_mask|>&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+30, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&*<|mdm_mask|>&* computer&* systems&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&*<|mdm_mask|>&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+31, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&*<|mdm_mask|>&* computer&* systems&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+32, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*<|mdm_mask|>&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&*<|mdm_mask|>&* computer&* systems&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+33, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&*<|mdm_mask|>&* computer&* systems&* that&*<|mdm_mask|>&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+34, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&*<|mdm_mask|>&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+35, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+36, *<|mdm_mask|>&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+37, *Artificial&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&*<|mdm_mask|>&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+38, *Artificial&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&*<|mdm_mask|>&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+39, *Artificial&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+40, *Artificial&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+41, *Artificial&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*<|mdm_mask|>&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+42, *Artificial&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+43, *Artificial&*<|mdm_mask|>&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&* the&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+44, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&* the&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&*<|mdm_mask|>&*<|mdm_mask|>&
+45, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&* the&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+46, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&* the&*<|mdm_mask|>&*<|mdm_mask|>&* human&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&*<|mdm_mask|>&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+47, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&* the&*<|mdm_mask|>&*<|mdm_mask|>&* human&*<|mdm_mask|>&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+48, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&*<|mdm_mask|>&* the&*<|mdm_mask|>&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+49, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&*<|mdm_mask|>&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&*<|mdm_mask|>&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+50, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&*<|mdm_mask|>&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&*<|mdm_mask|>&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&* perform&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+51, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&*<|mdm_mask|>&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&* perform&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*<|mdm_mask|>&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+52, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&*<|mdm_mask|>&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&* perform&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*,&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+53, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&* perform&* tasks&* such&*<|mdm_mask|>&*<|mdm_mask|>&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*,&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+54, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&* perform&* tasks&* such&*<|mdm_mask|>&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&*<|mdm_mask|>&*,&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+55, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&*<|mdm_mask|>&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&* perform&* tasks&* such&*<|mdm_mask|>&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+56, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&*<|mdm_mask|>&* perform&* tasks&* such&*<|mdm_mask|>&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+57, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&*<|mdm_mask|>&*<|mdm_mask|>&* in&* machines&* to&* perform&* tasks&* such&*<|mdm_mask|>&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+58, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&* human&*<|mdm_mask|>&* in&* machines&* to&* perform&* tasks&* such&*<|mdm_mask|>&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &*<|mdm_mask|>&
+59, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&* human&*<|mdm_mask|>&* in&* machines&* to&* perform&* tasks&* such&*<|mdm_mask|>&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &* &
+60, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&*<|mdm_mask|>&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&* human&*<|mdm_mask|>&* in&* machines&* to&* perform&* tasks&* such&* as&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &* &
+61, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&* used&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&* human&*<|mdm_mask|>&* in&* machines&* to&* perform&* tasks&* such&* as&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &* &
+62, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&* used&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&*<|mdm_mask|>&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&* human&* intelligence&* in&* machines&* to&* perform&* tasks&* such&* as&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &* &
+63, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&* used&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&* for&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&* human&* intelligence&* in&* machines&* to&* perform&* tasks&* such&* as&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&*<|mdm_mask|>&* decision&*-making&*.&* &* &
+64, *Artificial&* intelligence&* (&*AI&*)&* refers&* to&* the&* technology&* used&* in&* computer&* systems&* that&* are&* programmed&* to&* perceive&*,&* learn&*,&* reason&*,&* and&* make&* decisions&*,&* without&* the&* need&* for&* human&* intelligence&*.&* It&* is&* the&* simulation&* of&* human&* intelligence&* in&* machines&* to&* perform&* tasks&* such&* as&* pattern&* recognition&*,&* speech&* and&* visual&* perception&*,&* learning&*,&* and&* decision&*-making&*.&* &* &

visualization/visualization_paper.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import re
+from typing import List, Dict
+import os
+def parse_generation_history(file_path: str) -> Dict[int, List[str]]:
+    """Improved parser that handles math symbols and spaces correctly"""
+    history = {}
+    token_pattern = re.compile(r"\*([^&]*)&?")
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                step_part, content_part = line.split(',', 1)
+                step = int(step_part.strip())
+            except ValueError:
+                continue
+            tokens = []
+            for match in token_pattern.finditer(content_part):
+                raw_token = match.group(1).strip()
+                if raw_token == "":
+                    tokens.append(" ")
+                elif raw_token == "*":
+                    tokens.append("*")
+                else:
+                    tokens.append(raw_token)
+            while len(tokens) < 64:
+                tokens.append(" ")
+            if len(tokens) > 64:
+                print(f"Truncating extra tokens: Step {step} ({len(tokens)} tokens)")
+                tokens = tokens[:64]
+            elif len(tokens) < 64:
+                print(f"Padding missing tokens: Step {step} ({len(tokens)} tokens)")
+                tokens += [" "] * (64 - len(tokens))
+            history[step] = tokens
+    return history
+def track_token_positions(history: Dict[int, List[str]]) -> List[int]:
+    """Track the first generation step for each token"""
+    num_positions = 64
+    steps_to_unmask = [-1] * num_positions
+    for step in sorted(history.keys()):
+        tokens = history[step]
+        for idx in range(num_positions):
+            if idx >= len(tokens):
+                continue
+            token = tokens[idx]
+            if steps_to_unmask[idx] == -1 and token != '<|mdm_mask|>':
+                steps_to_unmask[idx] = step
+    return steps_to_unmask
+def generate_background_color(step: int, max_step: int) -> str:
+    """Generate gradient color"""
+    color_stops = [
+        (240, 248, 255), (209, 226, 241), (176, 202, 224), (143, 179, 207),
+        (110, 156, 191), (77, 133, 175), (44, 110, 159), (12, 55, 112)
+    ]
+    color_index = min(step * 6 // max_step, 6)
+    ratio = (step % 2) / 2
+    start = color_stops[color_index]
+    end = color_stops[min(color_index + 1, 7)]
+    r = int(start[0] + (end[0] - start[0]) * ratio)
+    g = int(start[1] + (end[1] - start[1]) * ratio)
+    b = int(start[2] + (end[2] - start[2]) * ratio)
+    return f"#{r:02x}{g:02x}{b:02x}"
+def generate_step_visualization(current_step: int, current_tokens: List[str],
+                               token_steps: List[int], max_step: int) -> str:
+    """Generate visualization for specific step"""
+    html = []
+    for idx, token in enumerate(current_tokens):
+        style = [
+            "color: #000000",
+            "padding: 6px 8px",
+            "margin: 3px",
+            "border-radius: 6px",
+            "display: inline-block",
+            "font-weight: 600",
+            "font-size: 16px",
+            "font-family: 'Segoe UI', sans-serif",
+            "box-shadow: 0 3px 6px rgba(12,55,112,0.15)",
+            "transition: all 0.2s ease",
+            "position: relative",
+            "width: 120px",
+            "min-width: 120px",
+            "text-align: center",
+            "white-space: nowrap",
+            "overflow: hidden",
+            "text-overflow: ellipsis",
+            "box-sizing: border-box"
+        ]
+        if token == '<|mdm_mask|>':
+            style.extend([
+                "background: #f8fafc",
+                "border: 2px solid #ffffff",
+                "font-weight: 800",
+                "text-transform: uppercase",
+                "padding: 4px 6px"
+            ])
+            display_text = "Mask"
+        else:
+            bg_color = generate_background_color(token_steps[idx], max_step)
+            style.append(f"background-color: {bg_color}")
+            display_text = token if token != " " else "␣"
+        html.append(f'<span style="{"; ".join(style)}">{display_text}</span>')
+    return '\n'.join(html)
+def main(target_step: int = 64):
+    """Main function supporting target step specification"""
+    file_path = "sample_process.txt"
+    final_step = 64
+    history = parse_generation_history(file_path)
+    if target_step not in history:
+        raise ValueError(f"Invalid target step: {target_step}")
+    token_steps = track_token_positions(history)
+    current_tokens = history[target_step]
+    html_content = generate_step_visualization(
+        target_step, current_tokens, token_steps, final_step
+    )
+    example_steps = [0, 16, 32, 48, 64]
+    example_colors = [generate_background_color(s, final_step) for s in example_steps]
+    legend_html = ''.join(
+        f'<div style="background-color: {color}; color: black;">Step {s}</div>'
+        for s, color in zip(example_steps, example_colors)
+    )
+    target_dir = "html/sample_process_paper"
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
+    with open(f"{target_dir}/visualization_step_{target_step}.html", "w", encoding="utf-8") as f:
+        f.write(f"""<html>
+<head>
+    <title>Step {target_step} Visualization</title>
+    <style>
+        body {{
+            padding: 40px;
+            background: #f8fafc;
+            font-family: 'Segoe UI', sans-serif;
+        }}
+        .legend {{
+            display: flex;
+            gap: 15px;
+            margin: 20px 0;
+        }}
+        .legend div {{
+            padding: 10px;
+            border-radius: 5px;
+            color: white;
+            min-width: 80px;
+            text-align: center;
+        }}
+        .token:hover {{
+            transform: translateY(-2px);
+        }}
+    </style>
+</head>
+<body>
+    <div style="max-width: 1000px; margin: auto;">
+        <h2>Generation Step {target_step}</h2>
+        <div>{html_content}</div>
+        <h3>Color Legend</h3>
+        <div class="legend">{legend_html}</div>
+    </div>
+</body>
+</html>""")
+if __name__ == "__main__":
+    for step in range(1, 65):
+        main(target_step=step)

visualization/visualization_zhihu.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import re
+from typing import List, Dict
+import os
+def parse_generation_history(file_path: str) -> Dict[int, List[str]]:
+    """Improved parser that handles math symbols and spaces correctly"""
+    history = {}
+    token_pattern = re.compile(r"\*([^&]*)&?")
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                step_part, content_part = line.split(',', 1)
+                step = int(step_part.strip())
+            except ValueError:
+                continue
+            tokens = []
+            for match in token_pattern.finditer(content_part):
+                raw_token = match.group(1).strip()
+                if raw_token == "":
+                    tokens.append(" ")
+                elif raw_token == "*":
+                    tokens.append("*")
+                else:
+                    tokens.append(raw_token)
+            while len(tokens) < 64:
+                tokens.append(" ")
+            if len(tokens) > 64:
+                print(f"Truncating extra tokens: Step {step} ({len(tokens)} tokens)")
+                tokens = tokens[:64]
+            elif len(tokens) < 64:
+                print(f"Padding missing tokens: Step {step} ({len(tokens)} tokens)")
+                tokens += [" "] * (64 - len(tokens))
+            tokens = tokens[:62]
+            history[step] = tokens
+    return history
+def track_token_positions(history: Dict[int, List[str]]) -> List[int]:
+    """Track the first generation step for each token"""
+    num_positions = 64
+    steps_to_unmask = [-1] * num_positions
+    for step in sorted(history.keys()):
+        tokens = history[step]
+        for idx in range(num_positions):
+            if idx >= len(tokens):
+                continue
+            token = tokens[idx]
+            if steps_to_unmask[idx] == -1 and token != '<|mdm_mask|>':
+                steps_to_unmask[idx] = step
+    return steps_to_unmask
+def generate_background_color(step: int, max_step: int) -> str:
+    """Generate gradient color for text (darker version)"""
+    color_stops = [
+        (176, 202, 224),
+        (143, 179, 207),
+        (110, 156, 191),
+        (80, 130, 240),
+        (40, 90, 200),
+        (20, 70, 180),
+        (0, 50, 160),
+    ]
+    color_index = min(int(step ** 0.7 / max_step ** 0.7 * 6), 6)
+    ratio = (step % 2) / 2
+    start = color_stops[color_index]
+    end = color_stops[min(color_index + 1, 6)]
+    r = int(start[0] + (end[0] - start[0]) * ratio)
+    g = int(start[1] + (end[1] - start[1]) * ratio)
+    b = int(start[2] + (end[2] - start[2]) * ratio)
+    return f"#{r:02x}{g:02x}{b:02x}"
+def generate_step_visualization(current_step: int, current_tokens: List[str],
+                               token_steps: List[int], max_step: int) -> str:
+    """Final visualization version (completely borderless)"""
+    html = []
+    for idx, token in enumerate(current_tokens):
+        style = [
+            "padding: 6px 8px",
+            "margin: 2px",
+            "border-radius: 6px",
+            "display: inline-block",
+            "font-weight: 600",
+            "font-size: 16px",
+            "font-family: 'Segoe UI', sans-serif",
+            "transition: all 0.2s ease",
+            "width: 120px",
+            "min-width: 120px",
+            "text-align: center",
+            "white-space: nowrap",
+            "overflow: hidden",
+            "text-overflow: ellipsis",
+            "box-sizing: border-box",
+            "vertical-align: middle",
+            "border: 0 !important",
+            "outline: 0 !important",
+            "box-shadow: none !important",
+            "position: relative",
+            "z-index: 1"
+        ]
+        if token == '<|mdm_mask|>':
+            style.extend([
+                "color: transparent",
+                "background: #f8fafc",
+                "text-shadow: none"
+            ])
+            display_text = "&#8203;"
+        else:
+            text_color = generate_background_color(token_steps[idx], max_step)
+            style.append(f"color: {text_color}")
+            display_text = token if token != " " else "␣"
+        html.append(f'''
+            <div style="display: inline-block; border: none !important; margin: 0 !important; padding: 0 !important;">
+                <span style="{"; ".join(style)}">{display_text}</span>
+            </div>
+        ''')
+    return '\n'.join(html)
+def main(target_step: int = 64):
+    """Main function supporting target step specification"""
+    file_path = "sample_process.txt"
+    final_step = 64
+    history = parse_generation_history(file_path)
+    if target_step not in history:
+        raise ValueError(f"Invalid target step: {target_step}")
+    token_steps = track_token_positions(history)
+    current_tokens = history[target_step]
+    html_content = generate_step_visualization(
+        target_step, current_tokens, token_steps, final_step
+    )
+    example_steps = [0, 16, 32, 48, 64]
+    example_colors = [generate_background_color(s, final_step) for s in example_steps]
+    legend_html = ''.join(
+        f'<div style="background-color: {color}; color: black;">Step {s}</div>'
+        for s, color in zip(example_steps, example_colors)
+    )
+    target_dir = "html/sample_process_zhihu"
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
+    with open(f"{target_dir}/visualization_step_{target_step}.html", "w", encoding="utf-8") as f:
+        f.write(f"""<html>
+<head>
+    <title>Step {target_step} Visualization</title>
+    <style>
+        body {{
+            padding: 40px;
+            background: #f8fafc;
+            font-family: 'Segoe UI', sans-serif;
+        }}
+        .legend {{
+            display: flex;
+            gap: 15px;
+            margin: 20px 0;
+        }}
+        .legend div {{
+            padding: 10px;
+            border-radius: 5px;
+            color: white;
+            min-width: 80px;
+            text-align: center;
+        }}
+        .token:hover {{
+            transform: translateY(-2px);
+        }}
+    </style>
+</head>
+<body>
+    <div style="max-width: 1000px; margin: auto;">
+        <div>{html_content}</div>
+    </div>
+</body>
+</html>""")
+if __name__ == "__main__":
+    for step in range(1, 65):
+        main(target_step=step)