Joash commited on
Commit
7819eb3
·
1 Parent(s): be43bdd

Add code review assistant

Browse files
Files changed (3) hide show
  1. README.md +51 -10
  2. app.py +423 -0
  3. requirements.txt +28 -0
README.md CHANGED
@@ -1,13 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: Code Review Assistant V3
3
- emoji: 📚
4
- colorFrom: indigo
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.9.0
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Code Review Assistant V3
2
+
3
+ This Space provides an automated code review system powered by Gemma-2b-it. It analyzes code and provides suggestions for improvements in multiple categories including issues, improvements, best practices, and security considerations.
4
+
5
+ ## Features
6
+
7
+ - 🔍 Automated code review for multiple programming languages
8
+ - 💡 Detailed suggestions for code improvements
9
+ - 🔒 Security considerations and best practices
10
+ - 📊 Review history and performance metrics
11
+ - ⚡ GPU-accelerated inference
12
+ - 🎨 Clean and intuitive interface
13
+
14
+ ## Supported Languages
15
+
16
+ - Python
17
+ - JavaScript
18
+ - Java
19
+ - C++
20
+ - TypeScript
21
+ - Go
22
+ - Rust
23
+
24
+ ## Usage
25
+
26
+ 1. Select the programming language from the dropdown
27
+ 2. Paste your code in the input box
28
+ 3. Click "Submit for Review"
29
+ 4. View the detailed review suggestions
30
+ 5. Check the History tab to see previous reviews
31
+ 6. Monitor performance in the Metrics tab
32
+
33
+ ## Technical Details
34
+
35
+ - Model: google/gemma-2b-it
36
+ - Framework: Gradio
37
+ - Inference: GPU-accelerated with PyTorch
38
+ - Persistent storage for review history and metrics
39
+
40
+ ## Environment Setup
41
+
42
+ The Space requires the following environment variables:
43
+ - `HUGGING_FACE_TOKEN`: Your Hugging Face token for model access
44
+ - `MODEL_NAME`: Defaults to "google/gemma-2b-it"
45
+
46
+ ## License
47
+
48
+ MIT License
49
+
50
  ---
51
+
52
+ sdk_version: 4.19.1
 
 
 
 
53
  app_file: app.py
54
  pinned: false
 
 
 
 
app.py ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ from huggingface_hub import login
5
+ import os
6
+ import logging
7
+ from datetime import datetime
8
+ import json
9
+ from typing import List, Dict
10
+ import warnings
11
+ import spaces
12
+
13
+ # Filter out warnings
14
+ warnings.filterwarnings('ignore')
15
+
16
+ # Configure logging
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Environment variables
21
+ HF_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
22
+ MODEL_NAME = os.getenv("MODEL_NAME", "google/gemma-2b-it")
23
+
24
+ # Create data directory for persistence
25
+ DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
26
+ os.makedirs(DATA_DIR, exist_ok=True)
27
+
28
+ # History file
29
+ HISTORY_FILE = os.path.join(DATA_DIR, "review_history.json")
30
+
31
+ class Review:
32
+ def __init__(self, code: str, language: str, suggestions: str):
33
+ self.code = code
34
+ self.language = language
35
+ self.suggestions = suggestions
36
+ self.timestamp = datetime.now().isoformat()
37
+ self.response_time = 0.0
38
+
39
+ def to_dict(self):
40
+ return {
41
+ 'timestamp': self.timestamp,
42
+ 'language': self.language,
43
+ 'code': self.code,
44
+ 'suggestions': self.suggestions,
45
+ 'response_time': self.response_time
46
+ }
47
+
48
+ @classmethod
49
+ def from_dict(cls, data):
50
+ review = cls(data['code'], data['language'], data['suggestions'])
51
+ review.timestamp = data['timestamp']
52
+ review.response_time = data['response_time']
53
+ return review
54
+
55
+ class CodeReviewer:
56
+ def __init__(self):
57
+ self.model = None
58
+ self.tokenizer = None
59
+ self.device = None
60
+ self.review_history: List[Review] = []
61
+ self.metrics = {
62
+ 'total_reviews': 0,
63
+ 'avg_response_time': 0.0,
64
+ 'reviews_today': 0
65
+ }
66
+ self._initialized = False
67
+ self.load_history()
68
+
69
+ def load_history(self):
70
+ """Load review history from file."""
71
+ try:
72
+ if os.path.exists(HISTORY_FILE):
73
+ with open(HISTORY_FILE, 'r') as f:
74
+ data = json.load(f)
75
+ self.review_history = [Review.from_dict(r) for r in data['history']]
76
+ self.metrics = data['metrics']
77
+ logger.info(f"Loaded {len(self.review_history)} reviews from history")
78
+ except Exception as e:
79
+ logger.error(f"Error loading history: {e}")
80
+ # Initialize empty history if file doesn't exist or is corrupted
81
+ self.review_history = []
82
+ self.metrics = {
83
+ 'total_reviews': 0,
84
+ 'avg_response_time': 0.0,
85
+ 'reviews_today': 0
86
+ }
87
+
88
+ def save_history(self):
89
+ """Save review history to file."""
90
+ try:
91
+ data = {
92
+ 'history': [r.to_dict() for r in self.review_history],
93
+ 'metrics': self.metrics
94
+ }
95
+ # Ensure the directory exists
96
+ os.makedirs(os.path.dirname(HISTORY_FILE), exist_ok=True)
97
+ with open(HISTORY_FILE, 'w') as f:
98
+ json.dump(data, f)
99
+ logger.info("Saved review history")
100
+ except Exception as e:
101
+ logger.error(f"Error saving history: {e}")
102
+
103
+ @spaces.GPU
104
+ def ensure_initialized(self):
105
+ """Ensure model is initialized."""
106
+ if not self._initialized:
107
+ self.initialize_model()
108
+ self._initialized = True
109
+
110
+ def initialize_model(self):
111
+ """Initialize the model and tokenizer."""
112
+ try:
113
+ if HF_TOKEN:
114
+ login(token=HF_TOKEN, add_to_git_credential=False)
115
+
116
+ logger.info("Loading tokenizer...")
117
+ self.tokenizer = AutoTokenizer.from_pretrained(
118
+ MODEL_NAME,
119
+ token=HF_TOKEN,
120
+ trust_remote_code=True
121
+ )
122
+ special_tokens = {
123
+ 'pad_token': '[PAD]',
124
+ 'eos_token': '</s>',
125
+ 'bos_token': '<s>'
126
+ }
127
+ num_added = self.tokenizer.add_special_tokens(special_tokens)
128
+ logger.info(f"Added {num_added} special tokens")
129
+ logger.info("Tokenizer loaded successfully")
130
+
131
+ logger.info("Loading model...")
132
+ self.model = AutoModelForCausalLM.from_pretrained(
133
+ MODEL_NAME,
134
+ device_map="auto",
135
+ torch_dtype=torch.float16,
136
+ trust_remote_code=True,
137
+ low_cpu_mem_usage=True,
138
+ token=HF_TOKEN
139
+ )
140
+ if num_added > 0:
141
+ logger.info("Resizing model embeddings for special tokens")
142
+ self.model.resize_token_embeddings(len(self.tokenizer))
143
+
144
+ self.device = next(self.model.parameters()).device
145
+ logger.info(f"Model loaded successfully on {self.device}")
146
+ self._initialized = True
147
+ return True
148
+ except Exception as e:
149
+ logger.error(f"Error initializing model: {e}")
150
+ self._initialized = False
151
+ return False
152
+
153
+ def create_review_prompt(self, code: str, language: str) -> str:
154
+ """Create a structured prompt for code review."""
155
+ return f"""Review this {language} code. List specific points in these sections:
156
+ Issues:
157
+ Improvements:
158
+ Best Practices:
159
+ Security:
160
+
161
+ Code:
162
+ ```{language}
163
+ {code}
164
+ ```"""
165
+
166
+ @spaces.GPU
167
+ def review_code(self, code: str, language: str) -> str:
168
+ """Perform code review using the model."""
169
+ try:
170
+ if not self._initialized and not self.initialize_model():
171
+ return "Error: Model initialization failed. Please try again later."
172
+
173
+ start_time = datetime.now()
174
+ prompt = self.create_review_prompt(code, language)
175
+
176
+ try:
177
+ inputs = self.tokenizer(
178
+ prompt,
179
+ return_tensors="pt",
180
+ truncation=True,
181
+ max_length=512,
182
+ padding=True
183
+ )
184
+ if inputs is None:
185
+ raise ValueError("Failed to tokenize input")
186
+ inputs = inputs.to(self.device)
187
+ except Exception as token_error:
188
+ logger.error(f"Tokenization error: {token_error}")
189
+ return "Error: Failed to process input code. Please try again."
190
+
191
+ try:
192
+ with torch.no_grad():
193
+ outputs = self.model.generate(
194
+ **inputs,
195
+ max_new_tokens=512,
196
+ do_sample=True,
197
+ temperature=0.7,
198
+ top_p=0.95,
199
+ num_beams=1,
200
+ early_stopping=True,
201
+ pad_token_id=self.tokenizer.pad_token_id,
202
+ eos_token_id=self.tokenizer.eos_token_id
203
+ )
204
+ except Exception as gen_error:
205
+ logger.error(f"Generation error: {gen_error}")
206
+ return "Error: Failed to generate review. Please try again."
207
+
208
+ try:
209
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
210
+ suggestions = response[len(prompt):].strip()
211
+ except Exception as decode_error:
212
+ logger.error(f"Decoding error: {decode_error}")
213
+ return "Error: Failed to decode model output. Please try again."
214
+
215
+ # Create and save review
216
+ end_time = datetime.now()
217
+ review = Review(code, language, suggestions)
218
+ review.response_time = (end_time - start_time).total_seconds()
219
+
220
+ # Update metrics first
221
+ self.metrics['total_reviews'] += 1
222
+ total_time = self.metrics['avg_response_time'] * (self.metrics['total_reviews'] - 1)
223
+ total_time += review.response_time
224
+ self.metrics['avg_response_time'] = total_time / self.metrics['total_reviews']
225
+
226
+ today = datetime.now().date()
227
+
228
+ # Add review to history
229
+ self.review_history.append(review)
230
+
231
+ # Update today's reviews count
232
+ self.metrics['reviews_today'] = sum(
233
+ 1 for r in self.review_history
234
+ if datetime.fromisoformat(r.timestamp).date() == today
235
+ )
236
+
237
+ # Save to file
238
+ self.save_history()
239
+
240
+ if self.device and self.device.type == "cuda":
241
+ del inputs, outputs
242
+ torch.cuda.empty_cache()
243
+
244
+ return suggestions
245
+
246
+ except Exception as e:
247
+ logger.error(f"Error during code review: {e}")
248
+ return f"Error performing code review: {str(e)}"
249
+
250
+ def update_metrics(self, review: Review):
251
+ """Update metrics with new review."""
252
+ self.metrics['total_reviews'] += 1
253
+
254
+ total_time = self.metrics['avg_response_time'] * (self.metrics['total_reviews'] - 1)
255
+ total_time += review.response_time
256
+ self.metrics['avg_response_time'] = total_time / self.metrics['total_reviews']
257
+
258
+ today = datetime.now().date()
259
+ self.metrics['reviews_today'] = sum(
260
+ 1 for r in self.review_history
261
+ if datetime.fromisoformat(r.timestamp).date() == today
262
+ )
263
+
264
+ def get_history(self) -> List[Dict]:
265
+ """Get formatted review history."""
266
+ return [
267
+ {
268
+ 'timestamp': r.timestamp,
269
+ 'language': r.language,
270
+ 'code': r.code,
271
+ 'suggestions': r.suggestions,
272
+ 'response_time': f"{r.response_time:.2f}s"
273
+ }
274
+ for r in reversed(self.review_history[-10:])
275
+ ]
276
+
277
+ def get_metrics(self) -> Dict:
278
+ """Get current metrics."""
279
+ return {
280
+ 'Total Reviews': self.metrics['total_reviews'],
281
+ 'Average Response Time': f"{self.metrics['avg_response_time']:.2f}s",
282
+ 'Reviews Today': self.metrics['reviews_today'],
283
+ 'Device': str(self.device) if self.device else "Not initialized"
284
+ }
285
+
286
+ # Initialize reviewer
287
+ reviewer = CodeReviewer()
288
+
289
+ # Create Gradio interface
290
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
291
+ gr.Markdown("# Code Review Assistant")
292
+ gr.Markdown("An automated code review system powered by Gemma-2b")
293
+
294
+ with gr.Tabs():
295
+ with gr.Tab("Review Code"):
296
+ with gr.Row():
297
+ with gr.Column():
298
+ code_input = gr.Textbox(
299
+ lines=10,
300
+ placeholder="Enter your code here...",
301
+ label="Code"
302
+ )
303
+ language_input = gr.Dropdown(
304
+ choices=["python", "javascript", "java", "cpp", "typescript", "go", "rust"],
305
+ value="python",
306
+ label="Language"
307
+ )
308
+ submit_btn = gr.Button("Submit for Review", variant="primary")
309
+ with gr.Column():
310
+ output = gr.Textbox(
311
+ label="Review Results",
312
+ lines=10
313
+ )
314
+
315
+ with gr.Tab("History"):
316
+ with gr.Row():
317
+ refresh_history = gr.Button("Refresh History", variant="secondary")
318
+ history_output = gr.Textbox(
319
+ label="Review History",
320
+ lines=20,
321
+ value="Click 'Refresh History' to view review history"
322
+ )
323
+
324
+ with gr.Tab("Metrics"):
325
+ with gr.Row():
326
+ refresh_metrics = gr.Button("Refresh Metrics", variant="secondary")
327
+ metrics_output = gr.JSON(
328
+ label="Performance Metrics"
329
+ )
330
+
331
+ @spaces.GPU
332
+ def review_code_interface(code: str, language: str) -> str:
333
+ if not code.strip():
334
+ return "Please enter some code to review."
335
+ try:
336
+ reviewer.ensure_initialized()
337
+ result = reviewer.review_code(code, language)
338
+ return result
339
+ except Exception as e:
340
+ logger.error(f"Interface error: {e}")
341
+ return f"Error: {str(e)}"
342
+
343
+ def get_history_interface() -> str:
344
+ try:
345
+ history = reviewer.get_history()
346
+ if not history:
347
+ return "No reviews yet."
348
+ result = ""
349
+ for review in history:
350
+ result += f"Time: {review['timestamp']}\n"
351
+ result += f"Language: {review['language']}\n"
352
+ result += f"Response Time: {review['response_time']}\n"
353
+ result += "Code:\n```\n" + review['code'] + "\n```\n"
354
+ result += "Suggestions:\n" + review['suggestions'] + "\n"
355
+ result += "-" * 80 + "\n\n"
356
+ return result
357
+ except Exception as e:
358
+ logger.error(f"History error: {e}")
359
+ return "Error retrieving history"
360
+
361
+ def get_metrics_interface() -> Dict:
362
+ try:
363
+ metrics = reviewer.get_metrics()
364
+ if not metrics:
365
+ return {
366
+ 'Total Reviews': 0,
367
+ 'Average Response Time': '0.00s',
368
+ 'Reviews Today': 0,
369
+ 'Device': str(reviewer.device) if reviewer.device else "Not initialized"
370
+ }
371
+ return metrics
372
+ except Exception as e:
373
+ logger.error(f"Metrics error: {e}")
374
+ return {"error": str(e)}
375
+
376
+ def update_all_outputs(code: str, language: str) -> tuple:
377
+ """Update all outputs after code review."""
378
+ result = review_code_interface(code, language)
379
+ history = get_history_interface()
380
+ metrics = get_metrics_interface()
381
+ return result, history, metrics
382
+
383
+ # Connect the interface
384
+ submit_btn.click(
385
+ update_all_outputs,
386
+ inputs=[code_input, language_input],
387
+ outputs=[output, history_output, metrics_output]
388
+ )
389
+
390
+ refresh_history.click(
391
+ get_history_interface,
392
+ outputs=history_output
393
+ )
394
+
395
+ refresh_metrics.click(
396
+ get_metrics_interface,
397
+ outputs=metrics_output
398
+ )
399
+
400
+ # Add example inputs
401
+ gr.Examples(
402
+ examples=[
403
+ ["""def add_numbers(a, b):
404
+ return a + b""", "python"],
405
+ ["""function calculateSum(numbers) {
406
+ let sum = 0;
407
+ for(let i = 0; i < numbers.length; i++) {
408
+ sum += numbers[i];
409
+ }
410
+ return sum;
411
+ }""", "javascript"]
412
+ ],
413
+ inputs=[code_input, language_input]
414
+ )
415
+
416
+ # Launch the app
417
+ if __name__ == "__main__":
418
+ iface.launch(
419
+ server_name="0.0.0.0",
420
+ server_port=7860,
421
+ show_error=True,
422
+ quiet=False
423
+ )
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ gradio>=4.0.0
3
+ transformers>=4.39.0
4
+ torch>=2.0.0
5
+ accelerate>=0.27.2
6
+ safetensors>=0.4.2
7
+ sentencepiece>=0.1.99
8
+
9
+ # Model dependencies
10
+ einops>=0.7.0
11
+ scipy>=1.11.0
12
+
13
+ # Hugging Face
14
+ huggingface-hub>=0.20.3
15
+ spaces>=0.19.4
16
+
17
+ # Utilities
18
+ python-dotenv>=1.0.0
19
+ pydantic>=2.4.2
20
+ numpy<2.0.0
21
+ tqdm>=4.66.0
22
+ requests>=2.31.0
23
+
24
+ # Memory optimization
25
+ psutil>=5.9.0
26
+
27
+ # For Gemma model
28
+ google-cloud-aiplatform>=1.36.4