yiksiu commited on
Commit
f118c1e
·
verified ·
1 Parent(s): dbfb7f2

upload submission files

Browse files
test_submission_ARC/.DS_Store ADDED
Binary file (10.2 kB). View file
 
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_featurizer ADDED
Binary file (2.36 kB). View file
 
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_indices ADDED
@@ -0,0 +1 @@
 
 
1
+ null
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:correct_symbol)_inverse_featurizer ADDED
Binary file (2.11 kB). View file
 
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_featurizer ADDED
Binary file (2.27 kB). View file
 
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_indices ADDED
@@ -0,0 +1 @@
 
 
1
+ null
test_submission_ARC/ARC_easy_Gemma2ForCausalLM_answer_pointer/ResidualStream(Layer:0,Token:last_token)_inverse_featurizer ADDED
Binary file (2.09 kB). View file
 
test_submission_ARC/featurizer.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test submission for ARC using identity featurizer."""
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import pyvene as pv
6
+ from CausalAbstraction.neural.featurizers import Featurizer
7
+
8
+
9
+ class IdentityFeaturizerModule(torch.nn.Module):
10
+ def __init__(self):
11
+ super().__init__()
12
+ self.dummy_param = torch.nn.Parameter(torch.zeros(1), requires_grad=True)
13
+
14
+
15
+
16
+ def forward(self, x):
17
+ # return x, None
18
+ return x + 0 * self.dummy_param.sum(), None
19
+
20
+
21
+ class IdentityInverseFeaturizerModule(torch.nn.Module):
22
+ def __init__(self):
23
+ super().__init__()
24
+
25
+ def forward(self, f, error):
26
+ return f
27
+
28
+
29
+ class IdentityFeaturizer(Featurizer):
30
+ def __init__(self, id="identity"):
31
+ featurizer = IdentityFeaturizerModule()
32
+ inverse_featurizer = IdentityInverseFeaturizerModule()
33
+
34
+ super().__init__(featurizer, inverse_featurizer, id=id)
test_submission_ARC/token_position.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Token position definitions (copied from MCQA task)
3
+ """
4
+
5
+ import re
6
+ from CausalAbstraction.neural.LM_units import TokenPosition, get_last_token_index
7
+
8
+
9
+ def get_token_positions(pipeline, causal_model):
10
+ """
11
+ Get token positions for the simple MCQA task.
12
+
13
+ Args:
14
+ pipeline: The language model pipeline with tokenizer
15
+ causal_model: The causal model for the task
16
+
17
+ Returns:
18
+ list[TokenPosition]: List of TokenPosition objects for intervention experiments
19
+ """
20
+ def get_correct_symbol_index(input, pipeline, causal_model):
21
+ """
22
+ Find the index of the correct answer symbol in the prompt.
23
+
24
+ Args:
25
+ input (Dict): The input dictionary to a causal model
26
+ pipeline: The tokenizer pipeline
27
+ causal_model: The causal model
28
+
29
+ Returns:
30
+ list[int]: List containing the index of the correct answer symbol token
31
+ """
32
+ # Run the model to get the answer position
33
+ output = causal_model.run_forward(input)
34
+ pointer = output["answer_pointer"]
35
+ correct_symbol = output[f"symbol{pointer}"]
36
+ prompt = input["raw_input"]
37
+
38
+ # Find all single uppercase letters in the prompt
39
+ matches = list(re.finditer(r"\b[A-Z]\b", prompt))
40
+
41
+ # Find the match corresponding to our correct symbol
42
+ symbol_match = None
43
+ for match in matches:
44
+ if prompt[match.start():match.end()] == correct_symbol:
45
+ symbol_match = match
46
+ break
47
+
48
+ if not symbol_match:
49
+ raise ValueError(f"Could not find correct symbol {correct_symbol} in prompt: {prompt}")
50
+
51
+ # Get the substring up to the symbol match end
52
+ substring = prompt[:symbol_match.end()]
53
+ tokenized_substring = list(pipeline.load(substring)["input_ids"][0])
54
+
55
+ # The symbol token will be at the end of the substring
56
+ return [len(tokenized_substring) - 1]
57
+
58
+ # Create TokenPosition objects
59
+ token_positions = [
60
+ TokenPosition(lambda x: get_correct_symbol_index(x, pipeline, causal_model), pipeline, id="correct_symbol"),
61
+ TokenPosition(lambda x: [get_correct_symbol_index(x, pipeline, causal_model)[0]+1], pipeline, id="correct_symbol_period"),
62
+ TokenPosition(lambda x: get_last_token_index(x, pipeline), pipeline, id="last_token")
63
+ ]
64
+ return token_positions