Refactor DB classes and management #5
@@ -12,9 +12,9 @@ from flask_jwt_extended import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from server.stat_gen import StatGen
|
from server.stat_gen import StatGen
|
||||||
from server.dataset_processor import DatasetProcessor
|
from server.enrichment import DatasetEnrichment
|
||||||
from server.exceptions import NotAuthorisedException, NotExistentDatasetException
|
from server.exceptions import NotAuthorisedException, NotExistentDatasetException
|
||||||
from db.database import PostgresConnector
|
from server.db.database import PostgresConnector
|
||||||
from server.auth import AuthManager
|
from server.auth import AuthManager
|
||||||
from server.utils import get_request_filters, get_dataset_and_validate
|
from server.utils import get_request_filters, get_dataset_and_validate
|
||||||
|
|
||||||
@@ -130,7 +130,7 @@ def upload_data():
|
|||||||
posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
|
posts_df = pd.read_json(post_file, lines=True, convert_dates=False)
|
||||||
topics = json.load(topic_file)
|
topics = json.load(topic_file)
|
||||||
|
|
||||||
processor = DatasetProcessor(posts_df, topics)
|
processor = DatasetEnrichment(posts_df, topics)
|
||||||
enriched_df = processor.enrich()
|
enriched_df = processor.enrich()
|
||||||
dataset_id = db.save_dataset_info(
|
dataset_id = db.save_dataset_info(
|
||||||
current_user, f"dataset_{current_user}", topics
|
current_user, f"dataset_{current_user}", topics
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from db.database import PostgresConnector
|
from server.db.database import PostgresConnector
|
||||||
from flask_bcrypt import Bcrypt
|
from flask_bcrypt import Bcrypt
|
||||||
|
|
||||||
class AuthManager:
|
class AuthManager:
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ import psycopg2
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from psycopg2.extras import RealDictCursor
|
from psycopg2.extras import RealDictCursor
|
||||||
from psycopg2.extras import execute_batch, Json
|
from psycopg2.extras import execute_batch, Json
|
||||||
from server.exceptions import NotExistentDatasetException
|
|
||||||
|
|
||||||
|
|
||||||
class PostgresConnector:
|
class PostgresConnector:
|
||||||
@@ -68,7 +67,6 @@ class PostgresConnector:
|
|||||||
type,
|
type,
|
||||||
parent_id,
|
parent_id,
|
||||||
author,
|
author,
|
||||||
title,
|
|
||||||
content,
|
content,
|
||||||
timestamp,
|
timestamp,
|
||||||
date,
|
date,
|
||||||
@@ -90,8 +88,7 @@ class PostgresConnector:
|
|||||||
%s, %s, %s, %s, %s,
|
%s, %s, %s, %s, %s,
|
||||||
%s, %s, %s, %s, %s,
|
%s, %s, %s, %s, %s,
|
||||||
%s, %s, %s, %s, %s,
|
%s, %s, %s, %s, %s,
|
||||||
%s, %s, %s, %s, %s,
|
%s, %s, %s, %s, %s
|
||||||
%s
|
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -103,7 +100,6 @@ class PostgresConnector:
|
|||||||
row["type"],
|
row["type"],
|
||||||
row["parent_id"],
|
row["parent_id"],
|
||||||
row["author"],
|
row["author"],
|
||||||
row.get("title"),
|
|
||||||
row["content"],
|
row["content"],
|
||||||
row["timestamp"],
|
row["timestamp"],
|
||||||
row["date"],
|
row["date"],
|
||||||
@@ -114,7 +110,7 @@ class PostgresConnector:
|
|||||||
row["source"],
|
row["source"],
|
||||||
row.get("topic"),
|
row.get("topic"),
|
||||||
row.get("topic_confidence"),
|
row.get("topic_confidence"),
|
||||||
Json(row["entities"]) if row.get("entities") else None,
|
Json(row["ner_entities"]) if row.get("ner_entities") else None,
|
||||||
row.get("emotion_anger"),
|
row.get("emotion_anger"),
|
||||||
row.get("emotion_disgust"),
|
row.get("emotion_disgust"),
|
||||||
row.get("emotion_fear"),
|
row.get("emotion_fear"),
|
||||||
@@ -130,19 +126,12 @@ class PostgresConnector:
|
|||||||
def get_dataset_content(self, dataset_id: int) -> pd.DataFrame:
|
def get_dataset_content(self, dataset_id: int) -> pd.DataFrame:
|
||||||
query = "SELECT * FROM events WHERE dataset_id = %s"
|
query = "SELECT * FROM events WHERE dataset_id = %s"
|
||||||
result = self.execute(query, (dataset_id,), fetch=True)
|
result = self.execute(query, (dataset_id,), fetch=True)
|
||||||
|
|
||||||
if result:
|
|
||||||
return pd.DataFrame(result)
|
return pd.DataFrame(result)
|
||||||
|
|
||||||
raise NotExistentDatasetException("Dataset does not exist")
|
|
||||||
|
|
||||||
def get_dataset_info(self, dataset_id: int) -> dict:
|
def get_dataset_info(self, dataset_id: int) -> dict:
|
||||||
query = "SELECT * FROM datasets WHERE id = %s"
|
query = "SELECT * FROM datasets WHERE id = %s"
|
||||||
result = self.execute(query, (dataset_id,), fetch=True)
|
result = self.execute(query, (dataset_id,), fetch=True)
|
||||||
if result:
|
return result[0] if result else None
|
||||||
return result[0]
|
|
||||||
|
|
||||||
raise NotExistentDatasetException("Dataset does not exist")
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.connection:
|
if self.connection:
|
||||||
@@ -30,10 +30,7 @@ CREATE TABLE events (
|
|||||||
hour INTEGER NOT NULL,
|
hour INTEGER NOT NULL,
|
||||||
weekday VARCHAR(255) NOT NULL,
|
weekday VARCHAR(255) NOT NULL,
|
||||||
|
|
||||||
/* Posts Only */
|
/* Comments and Replies */
|
||||||
title VARCHAR(255),
|
|
||||||
|
|
||||||
/* Comments Only*/
|
|
||||||
parent_id VARCHAR(255),
|
parent_id VARCHAR(255),
|
||||||
reply_to VARCHAR(255),
|
reply_to VARCHAR(255),
|
||||||
source VARCHAR(255) NOT NULL,
|
source VARCHAR(255) NOT NULL,
|
||||||
@@ -2,7 +2,7 @@ import pandas as pd
|
|||||||
|
|
||||||
from server.analysis.nlp import NLP
|
from server.analysis.nlp import NLP
|
||||||
|
|
||||||
class DatasetProcessor:
|
class DatasetEnrichment:
|
||||||
def __init__(self, df, topics):
|
def __init__(self, df, topics):
|
||||||
self.df = self._explode_comments(df)
|
self.df = self._explode_comments(df)
|
||||||
self.topics = topics
|
self.topics = topics
|
||||||
@@ -1,5 +1,3 @@
|
|||||||
import datetime
|
|
||||||
|
|
||||||
import nltk
|
import nltk
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import pandas as pd
|
|||||||
|
|
||||||
from flask import request
|
from flask import request
|
||||||
from flask_jwt_extended import get_jwt_identity
|
from flask_jwt_extended import get_jwt_identity
|
||||||
from db.database import PostgresConnector
|
from server.db.database import PostgresConnector
|
||||||
from server.exceptions import NotAuthorisedException
|
from server.exceptions import NotAuthorisedException
|
||||||
|
|
||||||
def parse_datetime_filter(value):
|
def parse_datetime_filter(value):
|
||||||
|
|||||||
Reference in New Issue
Block a user