upload submission files
Browse files- test_submission_ARC/.DS_Store +0 -0
- test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_featurizer +0 -0
- test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_indices +1 -0
- test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_inverse_featurizer +0 -0
- test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_featurizer +0 -0
- test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_indices +1 -0
- test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_inverse_featurizer +0 -0
- test_submission_ARC/featurizer.py +34 -0
- test_submission_ARC/token_position.py +64 -0
test_submission_ARC/.DS_Store
ADDED
Binary file (10.2 kB). View file
|
|
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_featurizer
ADDED
Binary file (2.36 kB). View file
|
|
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_indices
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
null
|
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_inverse_featurizer
ADDED
Binary file (2.11 kB). View file
|
|
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_featurizer
ADDED
Binary file (2.27 kB). View file
|
|
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_indices
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
null
|
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_inverse_featurizer
ADDED
Binary file (2.09 kB). View file
|
|
test_submission_ARC/featurizer.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Test submission for ARC using identity featurizer."""
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import pyvene as pv
|
6 |
+
from CausalAbstraction.neural.featurizers import Featurizer
|
7 |
+
|
8 |
+
|
9 |
+
class IdentityFeaturizerModule(torch.nn.Module):
|
10 |
+
def __init__(self):
|
11 |
+
super().__init__()
|
12 |
+
self.dummy_param = torch.nn.Parameter(torch.zeros(1), requires_grad=True)
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
def forward(self, x):
|
17 |
+
# return x, None
|
18 |
+
return x + 0 * self.dummy_param.sum(), None
|
19 |
+
|
20 |
+
|
21 |
+
class IdentityInverseFeaturizerModule(torch.nn.Module):
|
22 |
+
def __init__(self):
|
23 |
+
super().__init__()
|
24 |
+
|
25 |
+
def forward(self, f, error):
|
26 |
+
return f
|
27 |
+
|
28 |
+
|
29 |
+
class IdentityFeaturizer(Featurizer):
|
30 |
+
def __init__(self, id="identity"):
|
31 |
+
featurizer = IdentityFeaturizerModule()
|
32 |
+
inverse_featurizer = IdentityInverseFeaturizerModule()
|
33 |
+
|
34 |
+
super().__init__(featurizer, inverse_featurizer, id=id)
|
test_submission_ARC/token_position.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Token position definitions (copied from MCQA task)
|
3 |
+
"""
|
4 |
+
|
5 |
+
import re
|
6 |
+
from CausalAbstraction.neural.LM_units import TokenPosition, get_last_token_index
|
7 |
+
|
8 |
+
|
9 |
+
def get_token_positions(pipeline, causal_model):
|
10 |
+
"""
|
11 |
+
Get token positions for the simple MCQA task.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
pipeline: The language model pipeline with tokenizer
|
15 |
+
causal_model: The causal model for the task
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
list[TokenPosition]: List of TokenPosition objects for intervention experiments
|
19 |
+
"""
|
20 |
+
def get_correct_symbol_index(input, pipeline, causal_model):
|
21 |
+
"""
|
22 |
+
Find the index of the correct answer symbol in the prompt.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
input (Dict): The input dictionary to a causal model
|
26 |
+
pipeline: The tokenizer pipeline
|
27 |
+
causal_model: The causal model
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
list[int]: List containing the index of the correct answer symbol token
|
31 |
+
"""
|
32 |
+
# Run the model to get the answer position
|
33 |
+
output = causal_model.run_forward(input)
|
34 |
+
pointer = output["answer_pointer"]
|
35 |
+
correct_symbol = output[f"symbol{pointer}"]
|
36 |
+
prompt = input["raw_input"]
|
37 |
+
|
38 |
+
# Find all single uppercase letters in the prompt
|
39 |
+
matches = list(re.finditer(r"\b[A-Z]\b", prompt))
|
40 |
+
|
41 |
+
# Find the match corresponding to our correct symbol
|
42 |
+
symbol_match = None
|
43 |
+
for match in matches:
|
44 |
+
if prompt[match.start():match.end()] == correct_symbol:
|
45 |
+
symbol_match = match
|
46 |
+
break
|
47 |
+
|
48 |
+
if not symbol_match:
|
49 |
+
raise ValueError(f"Could not find correct symbol {correct_symbol} in prompt: {prompt}")
|
50 |
+
|
51 |
+
# Get the substring up to the symbol match end
|
52 |
+
substring = prompt[:symbol_match.end()]
|
53 |
+
tokenized_substring = list(pipeline.load(substring)["input_ids"][0])
|
54 |
+
|
55 |
+
# The symbol token will be at the end of the substring
|
56 |
+
return [len(tokenized_substring) - 1]
|
57 |
+
|
58 |
+
# Create TokenPosition objects
|
59 |
+
token_positions = [
|
60 |
+
TokenPosition(lambda x: get_correct_symbol_index(x, pipeline, causal_model), pipeline, id="correct_symbol"),
|
61 |
+
TokenPosition(lambda x: [get_correct_symbol_index(x, pipeline, causal_model)[0]+1], pipeline, id="correct_symbol_period"),
|
62 |
+
TokenPosition(lambda x: get_last_token_index(x, pipeline), pipeline, id="last_token")
|
63 |
+
]
|
64 |
+
return token_positions
|