Dongfu Jiang
commited on
Commit
·
096ad41
1
Parent(s):
b9ac13b
Update README.md
Browse files
README.md
CHANGED
@@ -84,6 +84,62 @@ print(comparison_results)
|
|
84 |
**We still recommend using the llm-blender wrapper to use the PairRM, as many useful application functions have been implemented to support various scenarios, such as rank, and conversation comparisons, best-of-n-sampling, etc.**
|
85 |
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
# Pairwise Reward Model for LLMs (PairRM) from LLM-Blender
|
88 |
|
89 |
|
|
|
84 |
**We still recommend using the llm-blender wrapper to use the PairRM, as many useful application functions have been implemented to support various scenarios, such as rank, and conversation comparisons, best-of-n-sampling, etc.**
|
85 |
|
86 |
|
87 |
+
You can also easily compare two conversations like the followings:
|
88 |
+
```python
|
89 |
+
def tokenize_conv_pair(convAs: List[str], convBs: List[str]):
|
90 |
+
"""Compare two conversations by takeing USER turns as inputs and ASSISTANT turns as candidates
|
91 |
+
Multi-turn conversations comparison is also supportted.
|
92 |
+
a conversation format is:
|
93 |
+
```python
|
94 |
+
[
|
95 |
+
{
|
96 |
+
"content": "hello",
|
97 |
+
"role": "USER"
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"content": "hi",
|
101 |
+
"role": "ASSISTANT"
|
102 |
+
},
|
103 |
+
...
|
104 |
+
]
|
105 |
+
```
|
106 |
+
Args:
|
107 |
+
convAs (List[List[dict]]): List of conversations
|
108 |
+
convAs (List[List[dict]]): List of conversations
|
109 |
+
"""
|
110 |
+
|
111 |
+
for c in convAs + convBs:
|
112 |
+
assert len(c) % 2 == 0, "Each conversation must have even number of turns"
|
113 |
+
assert all([c[i]['role'] == 'USER' for i in range(0, len(c), 2)]), "Each even turn must be USER"
|
114 |
+
assert all([c[i]['role'] == 'ASSISTANT' for i in range(1, len(c), 2)]), "Each odd turn must be ASSISTANT"
|
115 |
+
# check conversations correctness
|
116 |
+
assert len(convAs) == len(convBs), "Number of conversations must be the same"
|
117 |
+
for c_a, c_b in zip(convAs, convBs):
|
118 |
+
assert len(c_a) == len(c_b), "Number of turns in each conversation must be the same"
|
119 |
+
assert all([c_a[i]['content'] == c_b[i]['content'] for i in range(0, len(c_a), 2)]), "USER turns must be the same"
|
120 |
+
|
121 |
+
instructions = ["Finish the following coversation in each i-th turn by filling in <Response i> with your response."] * len(convAs)
|
122 |
+
inputs = [
|
123 |
+
"\n".join([
|
124 |
+
"USER: " + x[i]['content'] +
|
125 |
+
f"\nAssistant: <Response {i//2+1}>" for i in range(0, len(x), 2)
|
126 |
+
]) for x in convAs
|
127 |
+
]
|
128 |
+
cand1_texts = [
|
129 |
+
"\n".join([
|
130 |
+
f"<Response {i//2+1}>: " + x[i]['content'] for i in range(1, len(x), 2)
|
131 |
+
]) for x in convAs
|
132 |
+
]
|
133 |
+
cand2_texts = [
|
134 |
+
"\n".join([
|
135 |
+
f"<Response {i//2+1}>: " + x[i]['content'] for i in range(1, len(x), 2)
|
136 |
+
]) for x in convBs
|
137 |
+
]
|
138 |
+
inputs = [inst + inp for inst, inp in zip(instructions, inputs)]
|
139 |
+
encodings = tokenize_pair(inputs, cand1_texts, cand2_texts)
|
140 |
+
return encodings
|
141 |
+
```
|
142 |
+
|
143 |
# Pairwise Reward Model for LLMs (PairRM) from LLM-Blender
|
144 |
|
145 |
|