MetaCell
diff --git a/‎applications/virtual-fly-brain/Dockerfile‎
Lines changed: 22 additions & 1 deletion b/‎applications/virtual-fly-brain/Dockerfile‎
Lines changed: 22 additions & 1 deletion
diff --git a/‎applications/virtual-fly-brain/backend/README_LOGGING.md‎
Lines changed: 136 additions & 0 deletions b/‎applications/virtual-fly-brain/backend/README_LOGGING.md‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎applications/virtual-fly-brain/backend/docker-entrypoint.sh‎
Lines changed: 11 additions & 0 deletions b/‎applications/virtual-fly-brain/backend/docker-entrypoint.sh‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎applications/virtual-fly-brain/backend/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎applications/virtual-fly-brain/backend/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎applications/virtual-fly-brain/backend/vfb_cache_cleanup.sh‎
Lines changed: 12 additions & 0 deletions b/‎applications/virtual-fly-brain/backend/vfb_cache_cleanup.sh‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎applications/virtual-fly-brain/backend/virtual_fly_brain/__main__.py‎
Lines changed: 83 additions & 26 deletions b/‎applications/virtual-fly-brain/backend/virtual_fly_brain/__main__.py‎
Lines changed: 83 additions & 26 deletions
@@ -20,6 +20,11 @@ RUN apt-get update && apt-get install -y \
     libosmesa6 \
     freeglut3-dev \
     git \
+    cron \
+    x11-utils \
+    mesa-utils \
+    libx11-dev \
+    libxext-dev \
     && rm -rf /var/lib/apt/lists/*
 
 RUN apt-get remove -y nodejs npm || true
@@ -33,6 +38,8 @@ RUN npm install -g yarn && \
 
 # Set up virtual display for OpenGL
 ENV DISPLAY=:99
+ENV LIBGL_ALWAYS_INDIRECT=1
+ENV MESA_GL_VERSION_OVERRIDE=3.3
 
 COPY frontend ${BUILDDIR}
 WORKDIR ${BUILDDIR}
@@ -48,7 +55,21 @@ COPY backend/ /usr/src/app
 RUN pip3 install -e .
 
 RUN cp -r ${BUILDDIR}/dist/ /usr/src/app/virtual_fly_brain/www
+
+# Set up cache cleanup cron job
+COPY backend/vfb_cache_cleanup.sh /usr/local/bin/vfb_cache_cleanup.sh
+RUN chmod +x /usr/local/bin/vfb_cache_cleanup.sh
+
+# Copy and set up entrypoint script
+COPY backend/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
+# Create cron job to run cache cleanup daily at 2 AM
+RUN echo "0 2 * * * /usr/local/bin/vfb_cache_cleanup.sh" | crontab -
+
+# Create log directory for cache cleanup logs
+RUN mkdir -p /var/log && touch /var/log/vfb_cache_cleanup.log
 WORKDIR /usr/src/app/virtual_fly_brain
 
 EXPOSE 8080
-ENTRYPOINT ["gunicorn", "--log-level=info", "--preload", "--bind=0.0.0.0:8080", "virtual_fly_brain.__main__:app"]
+ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh", "gunicorn", "--log-level=info", "--preload", "--bind=0.0.0.0:8080", "--timeout=120", "--graceful-timeout=30", "--keep-alive=5", "virtual_fly_brain.__main__:app"]
@@ -0,0 +1,136 @@
+# API Request Logging
+
+This document describes the comprehensive logging system implemented for the Virtual Fly Brain REST API.
+
+## Overview
+
+The Flask application now includes detailed logging for all REST endpoint requests. This logging is designed to be visible in Kubernetes pod logs and provides comprehensive tracking of API usage.
+
+## Features
+
+### Request Tracking
+- **Unique Request IDs**: Each request gets a unique identifier combining timestamp and microsecond precision
+- **Complete Request Data**: Logs include all input parameters, headers, and metadata
+- **Response Tracking**: Success/error status and response times are logged
+- **Error Details**: Detailed error information including exception type and message
+
+### Log Format
+All logs use structured JSON format for easy parsing and analysis:
+
+```json
+{
+  "request_id": "2024-01-15T10:30:45.123456_567890",
+  "endpoint": "term_info",
+  "method": "GET",
+  "url": "http://localhost:8080/get_term_info?id=FBbt_00003624",
+  "path": "/get_term_info",
+  "remote_addr": "192.168.1.100",
+  "user_agent": "Mozilla/5.0...",
+  "args": {"id": "FBbt_00003624"},
+  "form_data": {},
+  "json_data": null,
+  "content_length": null
+}
+```
+
+## Logged Endpoints
+
+The following REST endpoints are instrumented with logging:
+
+- `GET /` - Static file serving (index.html)
+- `GET /get_instances` - Get instances by short_form parameter
+- `GET /get_term_info` - Get term information by id parameter
+- `GET /run_query` - Run queries by id and query_type parameters
+
+## Log Types
+
+### REQUEST_START
+Logged when a request begins processing:
+- Request metadata (URL, method, headers)
+- Input parameters (query args, form data, JSON body)
+- Client information (IP, User-Agent)
+
+### REQUEST_END
+Logged when a request completes successfully:
+- Request ID (matches REQUEST_START)
+- Processing duration in milliseconds
+- Response status code
+
+### REQUEST_ERROR
+Logged when a request fails:
+- Request ID (matches REQUEST_START)
+- Processing duration until error
+- Exception type and error message
+
+## Example Log Output
+
+```
+2024-01-15 10:30:45,123 - vfb_api - INFO - REQUEST_START: {"request_id": "2024-01-15T10:30:45.123456_567890", "endpoint": "term_info", "method": "GET", "url": "http://localhost:8080/get_term_info?id=FBbt_00003624", "path": "/get_term_info", "remote_addr": "192.168.1.100", "user_agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", "args": {"id": "FBbt_00003624"}, "form_data": {}, "json_data": null, "content_length": null}
+
+2024-01-15 10:30:45,456 - vfb_api - INFO - REQUEST_END: {"request_id": "2024-01-15T10:30:45.123456_567890", "status": "SUCCESS", "duration_ms": 333.45, "response_status": 200}
+```
+
+## Kubernetes Integration
+
+The logging is configured to write to stdout, which makes it automatically available through:
+
+```bash
+# View real-time logs
+kubectl logs -f <pod-name>
+
+# View logs with timestamps
+kubectl logs --timestamps <pod-name>
+
+# View recent logs
+kubectl logs --tail=100 <pod-name>
+
+# Follow logs for a specific container
+kubectl logs -f <pod-name> -c <container-name>
+```
+
+## Log Analysis
+
+The structured JSON format allows for easy log analysis:
+
+```bash
+# Filter for specific endpoints
+kubectl logs <pod-name> | grep "get_term_info"
+
+# Extract request IDs for debugging
+kubectl logs <pod-name> | grep "REQUEST_START" | jq -r '.request_id'
+
+# Monitor response times
+kubectl logs <pod-name> | grep "REQUEST_END" | jq '.duration_ms'
+
+# Track errors
+kubectl logs <pod-name> | grep "REQUEST_ERROR"
+```
+
+## Configuration
+
+The logging system is configured in `__main__.py`:
+
+- **Log Level**: INFO (captures all request information)
+- **Format**: Timestamped with logger name and level
+- **Handler**: StreamHandler (outputs to stdout)
+- **Logger Name**: `vfb_api`
+
+## Implementation Details
+
+The logging is implemented using a Python decorator `@log_request` that:
+
+1. Captures request start time and generates unique ID
+2. Extracts all request data (URL, parameters, headers)
+3. Logs REQUEST_START with complete request information
+4. Executes the endpoint function
+5. Logs REQUEST_END with timing and status information
+6. Handles exceptions and logs REQUEST_ERROR with error details
+
+## Benefits
+
+- **Production Monitoring**: Track API usage patterns and performance
+- **Debugging**: Trace specific requests through the system
+- **Performance Analysis**: Monitor response times and identify bottlenecks
+- **Error Tracking**: Capture and analyze application errors
+- **Compliance**: Maintain audit trail of API access
+- **Capacity Planning**: Understand load patterns and peak usage
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Start cron service in the background
+service cron start
+
+# Create cache directories with proper permissions
+mkdir -p /tmp/vfb_cache/term_info /tmp/vfb_cache/queries
+chmod 755 /tmp/vfb_cache /tmp/vfb_cache/term_info /tmp/vfb_cache/queries
+
+# Start the main application
+exec "$@"
@@ -16,4 +16,4 @@ dataclasses_json >= 0.5.7
 dacite >= 1.6.0
 pandas >= 1.0.0
 numpy >= 1.19.0
-git+https://github.com/VirtualFlyBrain/VFBquery.git
+vfbquery==0.3.4
@@ -0,0 +1,12 @@
+#!/bin/bash
+# VFB Cache Cleanup Job
+# This script cleans up expired cache files and can be run as a cron job
+
+# Set working directory to the application root
+cd /usr/src/app
+
+# Run cache cleanup using the installed Python environment
+python3 -m virtual_fly_brain.services.cache_manager cleanup
+
+# Log the cleanup with timestamp
+echo "$(date): VFB cache cleanup completed" >> /var/log/vfb_cache_cleanup.log
@@ -4,6 +4,10 @@
 import werkzeug
 import numpy as np
 import pandas as pd
+import logging
+import time
+from datetime import datetime
+from virtual_fly_brain.services.numpy_encoder import NumpyEncoder
 vfb = None
 try:
     import vfbquery as vfb
@@ -13,41 +17,91 @@
 from virtual_fly_brain.services.queries import run_query
 from virtual_fly_brain.services.term_info import get_term_info
 
-class NumpyEncoder(json.JSONEncoder):
-    """ Custom encoder for numpy data types """
-    def default(self, obj):
-        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
-                            np.int16, np.int32, np.int64, np.uint8,
-                            np.uint16, np.uint32, np.uint64)):
 
-            return int(obj)
-
-        elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
-            return float(obj)
-
-        elif isinstance(obj, (np.complex_, np.complex64, np.complex128)):
-            return {'real': obj.real, 'imag': obj.imag}
-
-        elif isinstance(obj, (np.ndarray,)):
-            return obj.tolist()
-
-        elif isinstance(obj, (np.bool_)):
-            return bool(obj)
-
-        elif isinstance(obj, (np.void)):
-            return None
-
-        return json.JSONEncoder.default(self, obj)
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler()  # This ensures logs go to stdout for pod logs
+    ]
+)
+
+# Create logger for API requests
+api_logger = logging.getLogger('vfb_api')
+
+
+def log_request(func):
+    """Decorator to log REST endpoint requests with input data"""
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        request_id = f"{datetime.now().isoformat()}_{int(time.time() * 1000000) % 1000000}"
+        
+        # Log request start
+        request_data = {
+            'request_id': request_id,
+            'endpoint': flask.request.endpoint,
+            'method': flask.request.method,
+            'url': flask.request.url,
+            'path': flask.request.path,
+            'remote_addr': flask.request.remote_addr,
+            'user_agent': flask.request.headers.get('User-Agent', ''),
+            'args': dict(flask.request.args),
+            'form_data': dict(flask.request.form) if flask.request.form else {},
+            'json_data': flask.request.get_json(silent=True) if flask.request.is_json else None,
+            'content_length': flask.request.content_length
+        }
+        
+        api_logger.info(f"REQUEST_START: {json.dumps(request_data, cls=NumpyEncoder)}")
+        
+        try:
+            # Execute the actual endpoint function
+            result = func(*args, **kwargs)
+            
+            # Log successful response
+            end_time = time.time()
+            duration_ms = round((end_time - start_time) * 1000, 2)
+            
+            response_data = {
+                'request_id': request_id,
+                'status': 'SUCCESS',
+                'duration_ms': duration_ms,
+                'response_status': getattr(result, 'status_code', 200) if hasattr(result, 'status_code') else 200
+            }
+            
+            api_logger.info(f"REQUEST_END: {json.dumps(response_data, cls=NumpyEncoder)}")
+            return result
+            
+        except Exception as e:
+            # Log error response
+            end_time = time.time()
+            duration_ms = round((end_time - start_time) * 1000, 2)
+            
+            error_data = {
+                'request_id': request_id,
+                'status': 'ERROR',
+                'duration_ms': duration_ms,
+                'error_type': type(e).__name__,
+                'error_message': str(e)
+            }
+            
+            api_logger.error(f"REQUEST_ERROR: {json.dumps(error_data, cls=NumpyEncoder)}")
+            raise
+    
+    wrapper.__name__ = func.__name__
+    return wrapper
 
 
 def init_webapp_routes(app):
     @app.route('/', methods=['GET'])
+    # @log_request
     def index():
         return flask.send_from_directory("www", 'index.html')
 
 
     @app.route('/get_instances', methods=['GET'])
     @cross_origin(supports_credentials=True)
+    # @log_request
     def instances():
         short_form = flask.request.args.get('short_form')
         if not short_form:
@@ -66,6 +120,7 @@ def instances():
 
     @app.route('/get_term_info', methods=['GET'])
     @cross_origin(supports_credentials=True)
+    # @log_request
     def term_info():
         term_id = flask.request.args.get('id')
         if not term_id:
@@ -82,6 +137,7 @@ def term_info():
 
     @app.route('/run_query', methods=['GET'])
     @cross_origin(supports_credentials=True)
+    # @log_request
     def get_query_results():
         term_id = flask.request.args.get('id')
         query_type = flask.request.args.get('query_type')
@@ -120,10 +176,11 @@ def handle_bad_request(e):
     )
 CORS(app, support_credentials=True)
 init_webapp_routes(app)
-# TODO: fix this to use the init_flask function from cloudharness
-#     app = init_flask(title="Virtual Fly Brain REST API", webapp=True, init_app_fn=init_webapp_routes)
+
 
 def main():
+    api_logger.info("Starting Virtual Fly Brain REST API server on host='0.0.0.0', port=8080")
+    api_logger.info("Logging is configured for API request tracking")
     app.run(host='0.0.0.0', port=8080)
 
 if __name__ == '__main__':