|
--- |
|
license: eupl-1.1 |
|
language: |
|
- el |
|
library_name: fasttext, flair |
|
pipeline_tag: text-classification |
|
--- |
|
|
|
|
|
# ilsp/justice |
|
|
|
- Models for processing Greek court decisions |
|
|
|
## Paragraph classification |
|
|
|
```python |
|
repo_id = "ilsp/justice" |
|
model_path = hf_hub_download(repo_id=repo_id, filename="20250105-court_decisions_paragraph_classifier.ftz") |
|
sample_decision = hf_hub_download(repo_id=repo_id, filename="sample_data/Α2485_2023.txt") # anonymized decision |
|
model = load_model(model_path) |
|
labels_map = { |
|
'preamble': '__label__0', '__label__0': 'preamble', |
|
'panel': '__label__1', '__label__1': 'panel', |
|
'litigants': '__label__2', '__label__2': 'litigants', |
|
'justification': '__label__3', '__label__3': 'justification', |
|
'decision': '__label__4', '__label__4': 'decision', |
|
'post': '__label__5', '__label__5': 'post'} |
|
|
|
with open(sample_decision) as inf: |
|
paras = [p for p in inf.read().split(NL) if p.strip()] |
|
random.shuffle(paras) |
|
text = NL.join(paras) |
|
|
|
nchars = 150 |
|
for line in text.split(NL): |
|
pred = labels_map[model.predict(line.strip())[0][0]] |
|
if len(line) > nchars: |
|
line = line[0:nchars] |
|
print(f"{line} -> {pred}") |
|
``` |
|
|
|
## Named entity recognition for anonymization in court decisions |
|
```python |
|
from flair.data import Sentence, Token |
|
from flair.models import SequenceTagger |
|
from huggingface_hub import hf_hub_download |
|
|
|
REPO_ID = "ilsp/justice" |
|
MODEL_PATH = "decisions-ner-model.pt" |
|
model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_PATH) |
|
model = SequenceTagger.load(model_path) |
|
|
|
text = "Για να δικάσει την από 30 Μαρτίου 2020 έφεση των 1) Νίκης Νικίδου του Νίκου , κατοίκου Νίκαιας ( Νεάπολης 1 ) , 2) Άννας Άννίδου του Άνθιμου , κατοίκου Αθήνας ( Αγράμπελης 1 ) και 3) Σοφίας Σοφίδου του Σοφοκλή , κατοίκου Στυλίδας ( Στρυμώνος 1 ) , οι οποίοι παρέστησαν με τον δικηγόρο Λυσία Λυσίου ( Α.Μ. 12341 ) , που τον διόρισαν με πληρεξούσιο ." |
|
sentence = Sentence([Token(t) for t in text.split()]) # or use a sentence splitter |
|
model.predict(sentence) |
|
sentence.get_spans("ner") |
|
|
|
``` |
|
``` |
|
[Span[11:13]: "Νίκης Νικίδου" → PERSON (1.0000), |
|
Span[14:15]: "Νίκου" → PERSON (1.0000), |
|
Span[19:21]: "Νεάπολης 1" → FAC (1.0000), |
|
Span[24:26]: "Άννας Άννίδου" → PERSON (1.0000), |
|
Span[27:28]: "Άνθιμου" → PERSON (1.0000), |
|
Span[32:34]: "Αγράμπελης 1" → FAC (1.0000), |
|
Span[37:39]: "Σοφίας Σοφίδου" → PERSON (1.0000), |
|
Span[40:41]: "Σοφοκλή" → PERSON (1.0000), |
|
Span[45:47]: "Στρυμώνος 1" → FAC (1.0000)] |
|
``` |