Spaces:

jayasuriyaK
/

nsfw_classifier

Sleeping

App Files Files Community

jayasuriyaK commited on Apr 28, 2024

Commit

e0c6f6e

verified ·

1 Parent(s): b10a9fb

Delete Upload

Browse files

Files changed (5) hide show

Upload/CustomModel/config.json +0 -27
Upload/CustomModel/model.safetensors +0 -3
Upload/CustomModel/training_args.bin +0 -3
Upload/app.py +0 -109
Upload/requirements.txt +0 -4

Upload/CustomModel/config.json DELETED Viewed

@@ -1,27 +0,0 @@
-{
-  "_name_or_path": "bert-base-uncased",
-  "architectures": [
-    "BertForSequenceClassification"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "problem_type": "single_label_classification",
-  "torch_dtype": "float32",
-  "transformers_version": "4.39.3",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
-}

Upload/CustomModel/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c0ae3b4736071ebf406209d00d51c502108761fafa3c8df37f6a009f0decb157
-size 437958648

Upload/CustomModel/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:62f276a3fac2555bc29c7da8ad3095096c7ee3452711ca0c0cab720c0e053210
-size 4920

Upload/app.py DELETED Viewed

@@ -1,109 +0,0 @@
-#run the app
-#python -m streamlit run d:/NSFW/Project/test1.py
-import torch
-from transformers import BertTokenizer, BertForSequenceClassification
-import math, keras_ocr
-# Initialize pipeline
-pipeline = keras_ocr.pipeline.Pipeline()
-tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-model_2 = BertForSequenceClassification.from_pretrained("CustomModel")
-model_2.to('cpu')
-import streamlit as st
-def get_distance(predictions):
-    """
-    Function returns dictionary with (key,value):
-        * text : detected text in image
-        * center_x : center of bounding box (x)
-        * center_y : center of bounding box (y)
-        * distance_from_origin : hypotenuse
-        * distance_y : distance between y and origin (0,0)
-    """
-    # Point of origin
-    x0, y0 = 0, 0
-    # Generate dictionary
-    detections = []
-    for group in predictions:
-        # Get center point of bounding box
-        top_left_x, top_left_y = group[1][0]
-        bottom_right_x, bottom_right_y = group[1][1]
-        center_x, center_y = (top_left_x + bottom_right_x)/2, (top_left_y + bottom_right_y)/2
-        # Use the Pythagorean Theorem to solve for distance from origin
-        distance_from_origin = math.dist([x0,y0], [center_x, center_y])
-        # Calculate difference between y and origin to get unique rows
-        distance_y = center_y - y0
-        # Append all results
-        detections.append({
-                            'text': group[0],
-                            'center_x': center_x,
-                            'center_y': center_y,
-                            'distance_from_origin': distance_from_origin,
-                            'distance_y': distance_y
-                        })
-    return detections
-def distinguish_rows(lst, thresh=15):
-    """Function to help distinguish unique rows"""
-    sublists = []
-    for i in range(0, len(lst)-1):
-        if (lst[i+1]['distance_y'] - lst[i]['distance_y'] <= thresh):
-            if lst[i] not in sublists:
-                sublists.append(lst[i])
-            sublists.append(lst[i+1])
-        else:
-            yield sublists
-            sublists = [lst[i+1]]
-    yield sublists
-# Title of the app
-st.title("Image Input App")
-# File uploader widget
-uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
-if uploaded_file is not None:
-# Read in image
-    read_image = keras_ocr.tools.read(uploaded_file)
-# prediction_groups is a list of (word, box) tuples
-    prediction_groups = pipeline.recognize([read_image])
-    predictions = prediction_groups[0] # extract text list
-    predictions = get_distance(predictions)
-    # Set thresh higher for text further apart
-    predictions = list(distinguish_rows(predictions, thresh=10))
-    # Remove all empty rows
-    predictions = list(filter(lambda x:x!=[], predictions))
-    # Order text detections in human readable format
-    ordered_preds = []
-    for row in predictions:
-        row = sorted(row, key=lambda x:x['distance_from_origin'])
-        for each in row: ordered_preds.append(each['text'])
-    # Join detections into sentence
-    sentance = ' '.join(ordered_preds)
-    #st.write(sentance)
-    text =sentance
-    print(text)
-    inputs = tokenizer(text,padding = True, truncation = True, return_tensors='pt').to('cpu')
-    outputs = model_2(**inputs)
-    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
-    predictions = predictions.cpu().detach().numpy()
-    print(predictions[0][0],predictions[0][1])
-    if predictions[0][0]>predictions[0][1]:
-        print('safe')
-        st.write('safe')
-    else:
-        print('Not safe')
-        st.write('n safe')

Upload/requirements.txt DELETED Viewed

@@ -1,4 +0,0 @@
-torch
-transformers
-keras_ocr
-streamlit