Spaces:

green
/

TopicDig

Runtime error

App Files Files Community

m. polinsky commited on Apr 6, 2022

Commit

fed8c31

1 Parent(s): 3195fcb

adding data gathering to digestor.py

Browse files

Files changed (1) hide show

digestor.py +39 -0

digestor.py CHANGED Viewed

@@ -210,3 +210,42 @@ class Digestor:
             digest.append(' '.join(each.summary_text))
         self.text = '\n\n'.join(digest)

             digest.append(' '.join(each.summary_text))
         self.text = '\n\n'.join(digest)
+        # Create dict to write out digest data for analysis
+        out_data = {}
+        t = dt.now()
+        datetime_str = f"""{t.hour:.2f}:{t.minute:.2f}:{t.second:.2f}"""
+        choices_str = ', '.join(self.user_choices)
+        digest_str = '\n\t'.join(digest)
+        # This is a long comprehension to store all the fields and values in each summary.
+        # integer: {
+                # name_of_field:value except for source,
+                         #   which is unhashable so needs explicit handling.
+               #   }
+        summaries = { #  k is a summary tuple, i,p = enumerate(k)
+                # Here we take the first dozen words of the first summary chunk as key
+                c: {
+                # field name : value unless its the source
+                k._fields[i]:p if k._fields[i]!='source'
+                else
+                {
+                    'name': k.source.source_name,
+                    'source_url': k.source.source_url,
+                    'Summarization" Checkpoint': k.source.source_summarization_checkpoint,
+                    'NER Checkpoint': k.source.source_ner_checkpoint,
+                } for i,p in enumerate(k)
+                } for c,k in enumerate(self.summaries)}
+        out_data['timestamp'] = datetime_str
+        out_data['article_count'] = len(self.summaries)
+        out_data['digest_length'] = len(digest_str.split(" "))
+        out_data['sum_params'] = {
+                        'token_limit':self.token_limit,
+                        'word_limit':self.word_limit,
+                        'params':self.SUMMARIZATION_PARAMETERS,
+                        }
+        out_data['summaries'] = summaries
+        return out_data