feat(api): add average thread length per emotion

feat(api): add average thread depth
fix(api): broken analysis calls due to overlap in attribute and method names
2026-02-23 19:09:48 +00:00 · 2026-02-23 18:14:34 +00:00 · 2026-02-23 18:14:24 +00:00 · 2026-02-23 17:15:14 +00:00 · 2026-02-23 17:14:12 +00:00 · 2026-02-17 18:51:15 +00:00
12 changed files with 1005 additions and 257 deletions
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -15,6 +15,7 @@
        "headlessui": "^0.0.0",
        "react": "^19.2.0",
        "react-dom": "^19.2.0",
+        "react-force-graph-3d": "^1.29.1",
        "react-router-dom": "^7.13.0",
        "recharts": "^3.7.0"
      },
@@ -267,6 +268,15 @@
        "@babel/core": "^7.0.0-0"
      }
    },
+    "node_modules/@babel/runtime": {
+      "version": "7.28.6",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz",
+      "integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
    "node_modules/@babel/template": {
      "version": "7.28.6",
      "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
@@ -2022,6 +2032,12 @@
        "url": "https://github.com/sponsors/tannerlinsley"
      }
    },
+    "node_modules/@tweenjs/tween.js": {
+      "version": "25.0.0",
+      "resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-25.0.0.tgz",
+      "integrity": "sha512-XKLA6syeBUaPzx4j3qwMqzzq+V4uo72BnlbOjmuljLrRqdsd3qnzvZZoxvMHZ23ndsRS4aufU6JOZYpCbU6T1A==",
+      "license": "MIT"
+    },
    "node_modules/@types/babel__core": {
      "version": "7.20.5",
      "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
@@ -2488,6 +2504,31 @@
        "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
      }
    },
+    "node_modules/3d-force-graph": {
+      "version": "1.79.1",
+      "resolved": "https://registry.npmjs.org/3d-force-graph/-/3d-force-graph-1.79.1.tgz",
+      "integrity": "sha512-iscIVt4jWjJ11KEEswgOIOWk8Ew4EFKHRyERJXJ0ouycqzHCtWwb9E5imnxS5rYF1f1IESkFNAfB+h3EkU0Irw==",
+      "license": "MIT",
+      "dependencies": {
+        "accessor-fn": "1",
+        "kapsule": "^1.16",
+        "three": ">=0.118 <1",
+        "three-forcegraph": "1",
+        "three-render-objects": "^1.35"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/accessor-fn": {
+      "version": "1.5.3",
+      "resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz",
+      "integrity": "sha512-rkAofCwe/FvYFUlMB0v0gWmhqtfAtV1IUkdPbfhTUyYniu5LrC0A0UJkTH0Jv3S8SvwkmfuAlY+mQIJATdocMA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      }
+    },
    "node_modules/acorn": {
      "version": "8.15.0",
      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
@@ -2793,6 +2834,12 @@
        "node": ">=12"
      }
    },
+    "node_modules/d3-binarytree": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/d3-binarytree/-/d3-binarytree-1.0.2.tgz",
+      "integrity": "sha512-cElUNH+sHu95L04m92pG73t2MEJXKu+GeKUN1TJkFsu93E5W8E9Sc3kHEGJKgenGvj19m6upSn2EunvMgMD2Yw==",
+      "license": "MIT"
+    },
    "node_modules/d3-cloud": {
      "version": "1.2.8",
      "resolved": "https://registry.npmjs.org/d3-cloud/-/d3-cloud-1.2.8.tgz",
@@ -2826,6 +2873,22 @@
        "node": ">=12"
      }
    },
+    "node_modules/d3-force-3d": {
+      "version": "3.0.6",
+      "resolved": "https://registry.npmjs.org/d3-force-3d/-/d3-force-3d-3.0.6.tgz",
+      "integrity": "sha512-4tsKHUPLOVkyfEffZo1v6sFHvGFwAIIjt/W8IThbp08DYAsXZck+2pSHEG5W1+gQgEvFLdZkYvmJAbRM2EzMnA==",
+      "license": "MIT",
+      "dependencies": {
+        "d3-binarytree": "1",
+        "d3-dispatch": "1 - 3",
+        "d3-octree": "1",
+        "d3-quadtree": "1 - 3",
+        "d3-timer": "1 - 3"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
    "node_modules/d3-format": {
      "version": "3.1.2",
      "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz",
@@ -2847,6 +2910,12 @@
        "node": ">=12"
      }
    },
+    "node_modules/d3-octree": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/d3-octree/-/d3-octree-1.1.0.tgz",
+      "integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==",
+      "license": "MIT"
+    },
    "node_modules/d3-path": {
      "version": "3.1.0",
      "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
@@ -2856,6 +2925,15 @@
        "node": ">=12"
      }
    },
+    "node_modules/d3-quadtree": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/d3-quadtree/-/d3-quadtree-3.0.1.tgz",
+      "integrity": "sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
    "node_modules/d3-scale": {
      "version": "4.0.2",
      "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
@@ -2958,6 +3036,18 @@
        "d3-selection": "2 - 3"
      }
    },
+    "node_modules/data-bind-mapper": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/data-bind-mapper/-/data-bind-mapper-1.0.3.tgz",
+      "integrity": "sha512-QmU3lyEnbENQPo0M1F9BMu4s6cqNNp8iJA+b/HP2sSb7pf3dxwF3+EP1eO69rwBfH9kFJ1apmzrtogAmVt2/Xw==",
+      "license": "MIT",
+      "dependencies": {
+        "accessor-fn": "1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
    "node_modules/debug": {
      "version": "4.4.3",
      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -3419,6 +3509,20 @@
      "dev": true,
      "license": "ISC"
    },
+    "node_modules/float-tooltip": {
+      "version": "1.7.5",
+      "resolved": "https://registry.npmjs.org/float-tooltip/-/float-tooltip-1.7.5.tgz",
+      "integrity": "sha512-/kXzuDnnBqyyWyhDMH7+PfP8J/oXiAavGzcRxASOMRHFuReDtofizLLJsf7nnDLAfEaMW4pVWaXrAjtnglpEkg==",
+      "license": "MIT",
+      "dependencies": {
+        "d3-selection": "2 - 3",
+        "kapsule": "^1.16",
+        "preact": "10"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
    "node_modules/follow-redirects": {
      "version": "1.15.11",
      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
@@ -3722,11 +3826,19 @@
      "dev": true,
      "license": "ISC"
    },
+    "node_modules/jerrypick": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/jerrypick/-/jerrypick-1.1.2.tgz",
+      "integrity": "sha512-YKnxXEekXKzhpf7CLYA0A+oDP8V0OhICNCr5lv96FvSsDEmrb0GKM776JgQvHTMjr7DTTPEVv/1Ciaw0uEWzBA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      }
+    },
    "node_modules/js-tokens": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
-      "dev": true,
      "license": "MIT"
    },
    "node_modules/js-yaml": {
@@ -3789,6 +3901,18 @@
        "node": ">=6"
      }
    },
+    "node_modules/kapsule": {
+      "version": "1.16.3",
+      "resolved": "https://registry.npmjs.org/kapsule/-/kapsule-1.16.3.tgz",
+      "integrity": "sha512-4+5mNNf4vZDSwPhKprKwz3330iisPrb08JyMgbsdFrimBCKNHecua/WBwvVg3n7vwx0C1ARjfhwIpbrbd9n5wg==",
+      "license": "MIT",
+      "dependencies": {
+        "lodash-es": "4"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
    "node_modules/keyv": {
      "version": "4.5.4",
      "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
@@ -3835,6 +3959,12 @@
      "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
      "license": "MIT"
    },
+    "node_modules/lodash-es": {
+      "version": "4.17.23",
+      "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz",
+      "integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==",
+      "license": "MIT"
+    },
    "node_modules/lodash.debounce": {
      "version": "4.0.8",
      "resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz",
@@ -3848,6 +3978,18 @@
      "dev": true,
      "license": "MIT"
    },
+    "node_modules/loose-envify": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
+      "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
+      "license": "MIT",
+      "dependencies": {
+        "js-tokens": "^3.0.0 || ^4.0.0"
+      },
+      "bin": {
+        "loose-envify": "cli.js"
+      }
+    },
    "node_modules/lru-cache": {
      "version": "5.1.1",
      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
@@ -3934,6 +4076,44 @@
      "dev": true,
      "license": "MIT"
    },
+    "node_modules/ngraph.events": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/ngraph.events/-/ngraph.events-1.4.0.tgz",
+      "integrity": "sha512-NeDGI4DSyjBNBRtA86222JoYietsmCXbs8CEB0dZ51Xeh4lhVl1y3wpWLumczvnha8sFQIW4E0vvVWwgmX2mGw==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/ngraph.forcelayout": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/ngraph.forcelayout/-/ngraph.forcelayout-3.3.1.tgz",
+      "integrity": "sha512-MKBuEh1wujyQHFTW57y5vd/uuEOK0XfXYxm3lC7kktjJLRdt/KEKEknyOlc6tjXflqBKEuYBBcu7Ax5VY+S6aw==",
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "ngraph.events": "^1.0.0",
+        "ngraph.merge": "^1.0.0",
+        "ngraph.random": "^1.0.0"
+      }
+    },
+    "node_modules/ngraph.graph": {
+      "version": "20.1.2",
+      "resolved": "https://registry.npmjs.org/ngraph.graph/-/ngraph.graph-20.1.2.tgz",
+      "integrity": "sha512-W/G3GBR3Y5UxMLHTUCPP9v+pbtpzwuAEIqP5oZV+9IwgxAIEZwh+Foc60iPc1idlnK7Zxu0p3puxAyNmDvBd0Q==",
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "ngraph.events": "^1.4.0"
+      }
+    },
+    "node_modules/ngraph.merge": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/ngraph.merge/-/ngraph.merge-1.0.0.tgz",
+      "integrity": "sha512-5J8YjGITUJeapsomtTALYsw7rFveYkM+lBj3QiYZ79EymQcuri65Nw3knQtFxQBU1r5iOaVRXrSwMENUPK62Vg==",
+      "license": "MIT"
+    },
+    "node_modules/ngraph.random": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/ngraph.random/-/ngraph.random-1.2.0.tgz",
+      "integrity": "sha512-4EUeAGbB2HWX9njd6bP6tciN6ByJfoaAvmVL9QTaZSeXrW46eNGA9GajiXiPBbvFqxUWFkEbyo6x5qsACUuVfA==",
+      "license": "BSD-3-Clause"
+    },
    "node_modules/node-releases": {
      "version": "2.0.27",
      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz",
@@ -3941,6 +4121,15 @@
      "dev": true,
      "license": "MIT"
    },
+    "node_modules/object-assign": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
+      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
    "node_modules/optionator": {
      "version": "0.9.4",
      "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -4044,6 +4233,18 @@
        "url": "https://github.com/sponsors/jonschlinkert"
      }
    },
+    "node_modules/polished": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/polished/-/polished-4.3.1.tgz",
+      "integrity": "sha512-OBatVyC/N7SCW/FaDHrSd+vn0o5cS855TOmYi4OkdWUMSJCET/xip//ch8xGUvtr3i44X9LVyWwQlRMTN3pwSA==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.17.8"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
    "node_modules/postcss": {
      "version": "8.5.6",
      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
@@ -4073,6 +4274,16 @@
        "node": "^10 || ^12 || >=14"
      }
    },
+    "node_modules/preact": {
+      "version": "10.28.3",
+      "resolved": "https://registry.npmjs.org/preact/-/preact-10.28.3.tgz",
+      "integrity": "sha512-tCmoRkPQLpBeWzpmbhryairGnhW9tKV6c6gr/w+RhoRoKEJwsjzipwp//1oCpGPOchvSLaAPlpcJi9MwMmoPyA==",
+      "license": "MIT",
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/preact"
+      }
+    },
    "node_modules/prelude-ls": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
@@ -4083,6 +4294,23 @@
        "node": ">= 0.8.0"
      }
    },
+    "node_modules/prop-types": {
+      "version": "15.8.1",
+      "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
+      "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
+      "license": "MIT",
+      "dependencies": {
+        "loose-envify": "^1.4.0",
+        "object-assign": "^4.1.1",
+        "react-is": "^16.13.1"
+      }
+    },
+    "node_modules/prop-types/node_modules/react-is": {
+      "version": "16.13.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
+      "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
+      "license": "MIT"
+    },
    "node_modules/proxy-from-env": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
@@ -4120,6 +4348,23 @@
        "react": "^19.2.4"
      }
    },
+    "node_modules/react-force-graph-3d": {
+      "version": "1.29.1",
+      "resolved": "https://registry.npmjs.org/react-force-graph-3d/-/react-force-graph-3d-1.29.1.tgz",
+      "integrity": "sha512-5Vp+PGpYnO+zLwgK2NvNqdXHvsWLrFzpDfJW1vUA1twjo9SPvXqfUYQrnRmAbD+K2tOxkZw1BkbH31l5b4TWHg==",
+      "license": "MIT",
+      "dependencies": {
+        "3d-force-graph": "^1.79",
+        "prop-types": "15",
+        "react-kapsule": "^2.5"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "peerDependencies": {
+        "react": "*"
+      }
+    },
    "node_modules/react-is": {
      "version": "19.2.4",
      "resolved": "https://registry.npmjs.org/react-is/-/react-is-19.2.4.tgz",
@@ -4127,6 +4372,21 @@
      "license": "MIT",
      "peer": true
    },
+    "node_modules/react-kapsule": {
+      "version": "2.5.7",
+      "resolved": "https://registry.npmjs.org/react-kapsule/-/react-kapsule-2.5.7.tgz",
+      "integrity": "sha512-kifAF4ZPD77qZKc4CKLmozq6GY1sBzPEJTIJb0wWFK6HsePJatK3jXplZn2eeAt3x67CDozgi7/rO8fNQ/AL7A==",
+      "license": "MIT",
+      "dependencies": {
+        "jerrypick": "^1.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "peerDependencies": {
+        "react": ">=16.13.1"
+      }
+    },
    "node_modules/react-redux": {
      "version": "9.2.0",
      "resolved": "https://registry.npmjs.org/react-redux/-/react-redux-9.2.0.tgz",
@@ -4413,12 +4673,67 @@
      "integrity": "sha512-05PUHKSNE8ou2dwIxTngl4EzcnsCDZGJ/iCLtDflR/SHB/ny14rXc+qU5P4mG9JkusiV7EivzY9Mhm55AzAvCg==",
      "license": "MIT"
    },
+    "node_modules/three": {
+      "version": "0.182.0",
+      "resolved": "https://registry.npmjs.org/three/-/three-0.182.0.tgz",
+      "integrity": "sha512-GbHabT+Irv+ihI1/f5kIIsZ+Ef9Sl5A1Y7imvS5RQjWgtTPfPnZ43JmlYI7NtCRDK9zir20lQpfg8/9Yd02OvQ==",
+      "license": "MIT"
+    },
+    "node_modules/three-forcegraph": {
+      "version": "1.43.1",
+      "resolved": "https://registry.npmjs.org/three-forcegraph/-/three-forcegraph-1.43.1.tgz",
+      "integrity": "sha512-lQnYPLvR31gb91mF5xHhU0jPHJgBPw9QB23R6poCk8Tgvz8sQtq7wTxwClcPdfKCBbHXsb7FSqK06Osiu1kQ5A==",
+      "license": "MIT",
+      "dependencies": {
+        "accessor-fn": "1",
+        "d3-array": "1 - 3",
+        "d3-force-3d": "2 - 3",
+        "d3-scale": "1 - 4",
+        "d3-scale-chromatic": "1 - 3",
+        "data-bind-mapper": "1",
+        "kapsule": "^1.16",
+        "ngraph.forcelayout": "3",
+        "ngraph.graph": "20",
+        "tinycolor2": "1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "peerDependencies": {
+        "three": ">=0.118.3"
+      }
+    },
+    "node_modules/three-render-objects": {
+      "version": "1.40.4",
+      "resolved": "https://registry.npmjs.org/three-render-objects/-/three-render-objects-1.40.4.tgz",
+      "integrity": "sha512-Ukpu1pei3L5r809izvjsZxwuRcYLiyn6Uvy3lZ9bpMTdvj3i6PeX6w++/hs2ZS3KnEzGjb6YvTvh4UQuwHTDJg==",
+      "license": "MIT",
+      "dependencies": {
+        "@tweenjs/tween.js": "18 - 25",
+        "accessor-fn": "1",
+        "float-tooltip": "^1.7",
+        "kapsule": "^1.16",
+        "polished": "4"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "peerDependencies": {
+        "three": ">=0.168"
+      }
+    },
    "node_modules/tiny-invariant": {
      "version": "1.3.3",
      "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
      "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
      "license": "MIT"
    },
+    "node_modules/tinycolor2": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz",
+      "integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==",
+      "license": "MIT"
+    },
    "node_modules/tinyglobby": {
      "version": "0.2.15",
      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -17,6 +17,7 @@
    "headlessui": "^0.0.0",
    "react": "^19.2.0",
    "react-dom": "^19.2.0",
+    "react-force-graph-3d": "^1.29.1",
    "react-router-dom": "^7.13.0",
    "recharts": "^3.7.0"
  },
--- a/frontend/src/components/UserStats.tsx
+++ b/frontend/src/components/UserStats.tsx
@@ -0,0 +1,61 @@
+import ForceGraph3D from "react-force-graph-3d";
+
+import {
+    type UserAnalysisResponse,
+    type InteractionGraph
+} from '../types/ApiTypes';
+
+import StatsStyling from "../styles/stats_styling";
+
+const styles = StatsStyling;
+
+function ApiToGraphData(apiData: InteractionGraph) {
+    const nodes = Object.keys(apiData).map(username => ({ id: username }));
+    const links = [];
+    
+    for (const [source, targets] of Object.entries(apiData)) {
+        for (const [target, count] of Object.entries(targets)) {
+            links.push({ source, target, value: count });
+        }
+    }
+    
+    // drop low-value and deleted interactions to reduce clutter
+    const filteredLinks = links.filter(link => 
+        link.value >= 2 && 
+        link.source !== "[deleted]" && 
+        link.target !== "[deleted]"
+    );
+
+    // also filter out nodes that are no longer connected after link filtering
+    const connectedNodeIds = new Set(filteredLinks.flatMap(link => [link.source, link.target]));
+    const filteredNodes = nodes.filter(node => connectedNodeIds.has(node.id));
+
+    return { nodes: filteredNodes, links: filteredLinks};
+}
+
+
+const UserStats = (props: { data: UserAnalysisResponse }) => {
+  const graphData = ApiToGraphData(props.data.interaction_graph);
+
+  return (
+    <div style={styles.page}>
+        <h2 style={styles.sectionTitle}>User Interaction Graph</h2>
+        <p style={styles.sectionSubtitle}>
+            This graph visualizes interactions between users based on comments and replies. 
+            Nodes represent users, and edges represent interactions (e.g., comments or replies) between them.
+        </p>
+        <div>
+            <ForceGraph3D
+                graphData={graphData}
+                nodeAutoColorBy="id"
+                linkDirectionalParticles={2}
+                linkDirectionalParticleSpeed={0.005}
+                linkWidth={(link) => Math.sqrt(link.value)}
+                nodeLabel={(node) => `${node.id}`}
+            />
+        </div>
+    </div>
+  );
+}
+
+export default UserStats;
--- a/frontend/src/pages/Stats.tsx
+++ b/frontend/src/pages/Stats.tsx
@@ -3,6 +3,7 @@ import axios from "axios";
 import StatsStyling from "../styles/stats_styling";
 import SummaryStats from "../components/SummaryStats";
 import EmotionalStats from "../components/EmotionalStats";
+import InteractionStats from "../components/UserStats";

 import { 
  type SummaryResponse, 
@@ -16,7 +17,7 @@ const styles = StatsStyling;
 const StatPage = () => {
  const [error, setError] = useState('');
  const [loading, setLoading] = useState(false);
-  const [activeView, setActiveView] = useState<"summary" | "emotional">("summary");
+  const [activeView, setActiveView] = useState<"summary" | "emotional" | "interaction">("summary");

  const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
  const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
@@ -133,6 +134,13 @@ return (
      >
        Emotional
      </button>
+
+      <button
+        onClick={() => setActiveView("interaction")}
+        style={activeView === "interaction" ? styles.buttonPrimary : styles.buttonSecondary}
+      >
+        Interaction
+      </button>
    </div>

    {activeView === "summary" && (
@@ -154,6 +162,10 @@ return (
      </div>
    )}

+    {activeView === "interaction" && userData && (
+      <InteractionStats data={userData} />
+    )}
+
  </div>
 );
 }
--- a/frontend/src/types/ApiTypes.ts
+++ b/frontend/src/types/ApiTypes.ts
@@ -35,9 +35,12 @@ type User = {
  vocab?: Vocab | null;
 };

+type InteractionGraph = Record<string, Record<string, number>>;
+
 type UserAnalysisResponse = {
  top_users: TopUser[];
  users: User[];
+  interaction_graph: InteractionGraph;
 };

 // Time Analysis
@@ -89,6 +92,7 @@ export type {
    TopUser,
    Vocab,
    User,
+    InteractionGraph,
    UserAnalysisResponse,
    FrequencyWord,
    AverageEmotionByTopic,
--- a/server/analysis/emotional.py
+++ b/server/analysis/emotional.py
@@ -0,0 +1,41 @@
+import pandas as pd
+
+class EmotionalAnalysis:
+    def __init__(self, df: pd.DataFrame):
+        self.df = df
+
+    def avg_emotion_by_topic(self) -> dict:
+        emotion_exclusions = [
+            "emotion_neutral",
+            "emotion_surprise"
+        ]
+
+        emotion_cols = [
+            col for col in self.df.columns
+            if col.startswith("emotion_") and col not in emotion_exclusions
+        ]
+
+        counts = (
+            self.df[
+                (self.df["topic"] != "Misc")
+            ]
+            .groupby("topic")
+            .size()
+            .rename("n")
+        )
+
+        avg_emotion_by_topic = (
+            self.df[
+                (self.df["topic"] != "Misc")
+            ]
+            .groupby("topic")[emotion_cols]
+            .mean()
+            .reset_index()
+        )
+
+        avg_emotion_by_topic = avg_emotion_by_topic.merge(
+            counts,
+            on="topic"
+        )
+
+        return avg_emotion_by_topic.to_dict(orient='records')
--- a/server/analysis/interactional.py
+++ b/server/analysis/interactional.py
@@ -0,0 +1,208 @@
+import pandas as pd
+import re
+
+from collections import Counter
+
+class InteractionAnalysis:
+    def __init__(self, df: pd.DataFrame, word_exclusions: set[str]):
+        self.df = df
+        self.word_exclusions = word_exclusions
+
+    def _tokenize(self, text: str):
+        tokens = re.findall(r"\b[a-z]{3,}\b", text)
+        return [t for t in tokens if t not in self.word_exclusions]
+
+    def _vocab_richness_per_user(self, min_words: int = 20, top_most_used_words: int = 100) -> list:
+        df = self.df.copy()
+        df["content"] = df["content"].fillna("").astype(str).str.lower()
+        df["tokens"] = df["content"].apply(self._tokenize)
+
+        rows = []
+        for author, group in df.groupby("author"):
+            all_tokens = [t for tokens in group["tokens"] for t in tokens]
+
+            total_words = len(all_tokens)
+            unique_words = len(set(all_tokens))
+            events = len(group)
+
+            # Min amount of words for a user, any less than this might give weird results
+            if total_words < min_words:
+                continue
+
+            # 100% = they never reused a word (excluding stop words)
+            vocab_richness = unique_words / total_words
+            avg_words = total_words / max(events, 1)
+
+            counts = Counter(all_tokens)
+            top_words = [
+                {"word": w, "count": int(c)}
+                for w, c in counts.most_common(top_most_used_words)
+            ]
+
+            rows.append({
+                "author": author,
+                "events": int(events),
+                "total_words": int(total_words),
+                "unique_words": int(unique_words),
+                "vocab_richness": round(vocab_richness, 3),
+                "avg_words_per_event": round(avg_words, 2),
+                "top_words": top_words
+            })
+
+        rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
+
+        return rows
+
+    def top_users(self) -> list:
+        counts = (
+            self.df.groupby(["author", "source"])
+            .size()
+            .sort_values(ascending=False)
+        )
+
+        top_users = [
+            {"author": author, "source": source, "count": int(count)}
+            for (author, source), count in counts.items()
+        ]
+
+        return top_users
+    
+    def per_user_analysis(self) -> dict:
+        per_user = (
+            self.df.groupby(["author", "type"])
+            .size()
+            .unstack(fill_value=0)
+        )
+
+        # ensure columns always exist
+        for col in ("post", "comment"):
+            if col not in per_user.columns:
+                per_user[col] = 0
+
+        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
+        per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
+        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
+        per_user_records = per_user.reset_index().to_dict(orient="records")
+
+        vocab_rows = self._vocab_richness_per_user()
+        vocab_by_author = {row["author"]: row for row in vocab_rows}
+
+        # merge vocab richness + per_user information
+        merged_users = []
+        for row in per_user_records:
+            author = row["author"]
+            merged_users.append({
+                "author": author,
+                "post": int(row.get("post", 0)),
+                "comment": int(row.get("comment", 0)),
+                "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
+                "comment_share": float(row.get("comment_share", 0)),
+                "vocab": vocab_by_author.get(author)
+            })
+
+        merged_users.sort(key=lambda u: u["comment_post_ratio"])
+
+        return merged_users
+    
+    def interaction_graph(self):
+        interactions = {a: {} for a in self.df["author"].dropna().unique()}
+
+        # reply_to refers to the comment id, this allows us to map comment ids to usernames
+        id_to_author = self.df.set_index("id")["author"].to_dict()
+
+        for _, row in self.df.iterrows():
+            a = row["author"]
+            reply_id = row["reply_to"]
+
+            if pd.isna(a) or pd.isna(reply_id) or reply_id == "":
+                continue
+
+            b = id_to_author.get(reply_id)
+            if b is None or a == b:
+                continue
+
+            interactions[a][b] = interactions[a].get(b, 0) + 1
+
+        return interactions
+    
+    def average_thread_depth(self):
+        depths = []
+        id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
+        for _, row in self.df.iterrows():
+            depth = 0
+            current_id = row["id"]
+
+            while True:
+                reply_to = id_to_reply.get(current_id)
+                if pd.isna(reply_to) or reply_to == "":
+                    break
+
+                depth += 1
+                current_id = reply_to
+
+            depths.append(depth)
+
+        if not depths:
+            return 0
+        
+        return round(sum(depths) / len(depths), 2)
+    
+    def average_thread_length_by_emotion(self):
+        emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
+
+        emotion_cols = [
+            c for c in self.df.columns
+            if c.startswith("emotion_") and c not in emotion_exclusions
+        ]
+
+        id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
+        length_cache = {}
+
+        def thread_length_from(start_id):
+            if start_id in length_cache:
+                return length_cache[start_id]
+
+            seen = set()
+            length = 1
+            current = start_id
+
+            while True:
+                if current in seen:
+                    # infinite loop shouldn't happen, but just in case
+                    break
+                seen.add(current)
+
+                reply_to = id_to_reply.get(current)
+
+                if reply_to is None or (isinstance(reply_to, float) and pd.isna(reply_to)) or reply_to == "":
+                    break
+
+                length += 1
+                current = reply_to
+
+                if current in length_cache:
+                    length += (length_cache[current] - 1)
+                    break
+
+            length_cache[start_id] = length
+            return length
+
+        emotion_to_lengths = {}
+
+        # Fill NaNs in emotion cols to avoid max() issues
+        emo_df = self.df[["id"] + emotion_cols].copy()
+        emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
+
+        for _, row in emo_df.iterrows():
+            msg_id = row["id"]
+            length = thread_length_from(msg_id)
+
+            emotions = {c: row[c] for c in emotion_cols}
+            dominant = max(emotions, key=emotions.get)
+
+            emotion_to_lengths.setdefault(dominant, []).append(length)
+
+        return {
+            emotion: round(sum(lengths) / len(lengths), 2)
+            for emotion, lengths in emotion_to_lengths.items()
+        }
--- a/server/analysis/linguistic.py
+++ b/server/analysis/linguistic.py
@@ -0,0 +1,113 @@
+import pandas as pd
+import re
+
+from collections import Counter
+from itertools import islice
+
+class LinguisticAnalysis:
+    def __init__(self, df: pd.DataFrame, word_exclusions: set[str]):
+        self.df = df
+        self.word_exclusions = word_exclusions
+
+    def _tokenize(self, text: str):
+        tokens = re.findall(r"\b[a-z]{3,}\b", text)
+        return [t for t in tokens if t not in self.word_exclusions]
+
+    def _clean_text(self, text: str) -> str:
+        text = re.sub(r"http\S+", "", text)        # remove URLs
+        text = re.sub(r"www\S+", "", text)
+        text = re.sub(r"&\w+;", "", text)          # remove HTML entities
+        text = re.sub(r"\bamp\b", "", text)        # remove stray amp
+        text = re.sub(r"\S+\.(jpg|jpeg|png|webp|gif)", "", text)
+        return text
+
+    def word_frequencies(self, limit: int = 100) -> dict:
+        texts = (
+            self.df["content"]
+            .dropna()
+            .astype(str)
+            .str.lower()
+        )
+
+        words = []
+        for text in texts:
+            tokens = re.findall(r"\b[a-z]{3,}\b", text)
+            words.extend(
+                w for w in tokens
+                if w not in self.word_exclusions
+            )
+
+
+        counts = Counter(words)
+
+        word_frequencies = (
+            pd.DataFrame(counts.items(), columns=["word", "count"])
+            .sort_values("count", ascending=False)
+            .head(limit)
+            .reset_index(drop=True)
+        )
+
+        return word_frequencies.to_dict(orient="records")
+    
+    def ngrams(self, n=2, limit=100):
+        texts = self.df["content"].dropna().astype(str).apply(self._clean_text).str.lower()
+        all_ngrams = []
+
+        for text in texts:
+            tokens = re.findall(r"\b[a-z]{3,}\b", text)
+
+            # stop word removal causes strange behaviors in ngrams
+            #tokens = [w for w in tokens if w not in self.word_exclusions]
+
+            ngrams = zip(*(islice(tokens, i, None) for i in range(n)))
+            all_ngrams.extend([" ".join(ng) for ng in ngrams])
+
+        counts = Counter(all_ngrams)
+
+        return (
+            pd.DataFrame(counts.items(), columns=["ngram", "count"])
+            .sort_values("count", ascending=False)
+            .head(limit)
+            .to_dict(orient="records")
+        )
+        
+    def identity_markers(self):
+        df = self.df.copy()
+        df["content"] = df["content"].fillna("").astype(str).str.lower()
+
+        in_group_words = {"we", "us", "our", "ourselves"}
+        out_group_words = {"they", "them", "their", "themselves"}
+
+        emotion_exclusions = [
+            "emotion_neutral",
+            "emotion_surprise"
+        ]
+
+        emotion_cols = [
+            col for col in self.df.columns
+            if col.startswith("emotion_") and col not in emotion_exclusions
+        ]
+        in_count = 0
+        out_count = 0
+        in_emotions = {e: 0 for e in emotion_cols}
+        out_emotions = {e: 0 for e in emotion_cols}
+        total = 0
+
+        for post in df:
+            text = post["content"]
+            tokens = re.findall(r"\b[a-z]{2,}\b", text)
+            total += len(tokens)
+            in_count += sum(t in in_group_words for t in tokens)
+            out_count += sum(t in out_group_words for t in tokens)
+
+            emotions = post[emotion_cols]
+            print(emotions)
+
+            
+
+        return {
+            "in_group_usage": in_count,
+            "out_group_usage": out_count,
+            "in_group_ratio": round(in_count / max(total, 1), 5),
+            "out_group_ratio": round(out_count / max(total, 1), 5),
+        }
--- a/server/analysis/nlp.py
+++ b/server/analysis/nlp.py
@@ -9,6 +9,7 @@ from sentence_transformers import SentenceTransformer
 class NLP:
    _topic_models: dict[str, SentenceTransformer] = {}
    _emotion_classifiers: dict[str, Any] = {}
+    _entity_recognizers: dict[str, Any] = {}
    _topic_embedding_cache: dict[tuple[str, ...], np.ndarray] = {}

    def __init__(
@@ -29,6 +30,9 @@ class NLP:
            self.emotion_classifier = self._get_emotion_classifier(
                self.device_str, self.pipeline_device
            )
+            self.entity_recognizer = self._get_entity_recognizer(
+                self.device_str, self.pipeline_device
+            )           
        except RuntimeError as exc:
            if self.use_cuda and "out of memory" in str(exc).lower():
                torch.cuda.empty_cache()
@@ -86,6 +90,27 @@ class NLP:
            )
            cls._emotion_classifiers[device_str] = classifier
        return classifier
+    
+    @classmethod
+    def _get_entity_recognizer(cls, device_str: str, pipeline_device: int) -> Any:
+        recognizer = cls._entity_recognizers.get(device_str)
+        if recognizer is None:
+            pipeline_kwargs = {
+                "aggregation_strategy": "simple",  # merges subwords
+                "device": pipeline_device,
+            }
+
+            if device_str == "cuda":
+                pipeline_kwargs["dtype"] = torch.float16
+
+            recognizer = pipeline(
+                "token-classification",
+                model="dslim/bert-base-NER",
+                **pipeline_kwargs,
+            )
+            cls._entity_recognizers[device_str] = recognizer
+
+        return recognizer

    def _encode_with_backoff(
        self, texts: list[str], initial_batch_size: int
@@ -129,6 +154,26 @@ class NLP:
                    continue
                raise

+    def _infer_entities_with_backoff(
+        self, texts: list[str], initial_batch_size: int
+    ) -> list[list[dict[str, Any]]]:
+
+        batch_size = initial_batch_size
+
+        while True:
+            try:
+                return self.entity_recognizer(texts, batch_size=batch_size)
+            except RuntimeError as exc:
+                if (
+                    self.use_cuda
+                    and "out of memory" in str(exc).lower()
+                    and batch_size > 4
+                ):
+                    batch_size = max(4, batch_size // 2)
+                    torch.cuda.empty_cache()
+                    continue
+                raise
+
    def add_emotion_cols(self) -> None:
        texts = self.df[self.content_col].astype(str).str.slice(0, 512).tolist()

@@ -183,3 +228,51 @@ class NLP:
        self.df.loc[self.df["topic_confidence"] < confidence_threshold, "topic"] = (
            "Misc"
        )
+        
+    def add_ner_cols(self, max_chars: int = 512) -> None:
+        texts = (
+            self.df[self.content_col]
+            .fillna("")
+            .astype(str)
+            .str.slice(0, max_chars)
+            .tolist()
+        )
+
+        if not texts:
+            self.df["entities"] = []
+            self.df["entity_counts"] = []
+            return
+
+        results = self._infer_entities_with_backoff(texts, 32 if self.use_cuda else 8)
+
+        entity_lists = []
+        entity_count_dicts = []
+
+        for row in results:
+            entities = []
+            counts = {}
+
+            for ent in row:
+                word = ent.get("word")
+                label = ent.get("entity_group")
+
+                if isinstance(word, str) and isinstance(label, str):
+                    entities.append({"text": word, "label": label})
+                    counts[label] = counts.get(label, 0) + 1
+
+            entity_lists.append(entities)
+            entity_count_dicts.append(counts)
+
+        self.df["entities"] = entity_lists
+        self.df["entity_counts"] = entity_count_dicts
+
+        # Expand label counts into columns
+        all_labels = set()
+        for d in entity_count_dicts:
+            all_labels.update(d.keys())
+
+        for label in all_labels:
+            col_name = f"entity_{label}"
+            self.df[col_name] = [
+                d.get(label, 0) for d in entity_count_dicts
+            ]
--- a/server/analysis/temporal.py
+++ b/server/analysis/temporal.py
@@ -0,0 +1,70 @@
+import pandas as pd
+
+class TemporalAnalysis:
+    def __init__(self, df: pd.DataFrame):
+        self.df = df
+  
+    def avg_reply_time_per_emotion(self) -> dict:
+        df = self.df.copy()
+
+        replies = df[
+            (df["type"] == "comment") &
+            (df["reply_to"].notna()) &
+            (df["reply_to"] != "")
+        ]
+
+        id_to_time = df.set_index("id")["dt"].to_dict()
+
+        def compute_reply_time(row):
+            reply_id = row["reply_to"]
+            parent_time = id_to_time.get(reply_id)
+
+            if parent_time is None:
+                return None
+
+            return (row["dt"] - parent_time).total_seconds()
+        
+        replies["reply_time"] = replies.apply(compute_reply_time, axis=1)
+        emotion_cols = [col for col in df.columns if col.startswith("emotion_") and col not in ("emotion_neutral", "emotion_surprise")]
+        replies["dominant_emotion"] = replies[emotion_cols].idxmax(axis=1)
+        
+        grouped = (
+            replies
+            .groupby("dominant_emotion")["reply_time"]
+            .agg(["mean", "count"])
+            .reset_index()
+        )
+
+        return grouped.to_dict(orient="records")
+    
+    def posts_per_day(self) -> dict:
+        per_day = (
+            self.df.groupby("date")
+            .size()
+            .reset_index(name="count")
+        )
+
+        return per_day.to_dict(orient="records")
+    
+    def heatmap(self) -> dict:
+        weekday_order = [
+            "Monday", "Tuesday", "Wednesday",
+            "Thursday", "Friday", "Saturday", "Sunday"
+        ]
+
+        self.df["weekday"] = pd.Categorical(
+            self.df["weekday"],
+            categories=weekday_order,
+            ordered=True
+        )
+
+        heatmap = (
+            self.df
+            .groupby(["weekday", "hour"], observed=True)
+            .size()
+            .unstack(fill_value=0)
+            .reindex(columns=range(24), fill_value=0)
+        )
+
+        heatmap.columns = heatmap.columns.map(str)
+        return heatmap.to_dict(orient="records")
--- a/server/app.py
+++ b/server/app.py
@@ -12,7 +12,7 @@ app = Flask(__name__)
 CORS(app, resources={r"/*": {"origins": "http://localhost:5173"}})

 # Global State
-posts_df = pd.read_json('posts.jsonl', lines=True)
+posts_df = pd.read_json('small.jsonl', lines=True)
 with open("topic_buckets.json", "r", encoding="utf-8") as f:
    domain_topics = json.load(f)
 stat_obj = StatGen(posts_df, domain_topics)
@@ -47,7 +47,7 @@ def get_dataset():
    if stat_obj is None:
        return jsonify({"error": "No data uploaded"}), 400
    
-    return jsonify(stat_obj.df.to_dict(orient="records")), 200
+    return stat_obj.df.to_json(orient="records"), 200, {"Content-Type": "application/json"}

@app.route('/stats/content', methods=['GET'])
 def word_frequencies():
@@ -55,7 +55,7 @@ def word_frequencies():
        return jsonify({"error": "No data uploaded"}), 400
    
    try:
-        return jsonify(stat_obj.content_analysis()), 200
+        return jsonify(stat_obj.get_content_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
@@ -80,7 +80,7 @@ def get_time_analysis():
        return jsonify({"error": "No data uploaded"}), 400
    
    try:
-        return jsonify(stat_obj.time_analysis()), 200
+        return jsonify(stat_obj.get_time_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
@@ -93,13 +93,39 @@ def get_user_analysis():
        return jsonify({"error": "No data uploaded"}), 400
    
    try:
-        return jsonify(stat_obj.user_analysis()), 200
+        return jsonify(stat_obj.get_user_analysis()), 200
    except ValueError as e:
        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
    except Exception as e:
        print(traceback.format_exc())
        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
    
+@app.route("/stats/cultural", methods=["GET"])
+def get_cultural_analysis():
+    if stat_obj is None:
+        return jsonify({"error": "No data uploaded"}), 400
+    
+    try:
+        return jsonify(stat_obj.get_cultural_analysis()), 200
+    except ValueError as e:
+        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
+    except Exception as e:
+        print(traceback.format_exc())
+        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
+
+@app.route("/stats/interaction", methods=["GET"])
+def get_interaction_analysis():
+    if stat_obj is None:
+        return jsonify({"error": "No data uploaded"}), 400
+    
+    try:
+        return jsonify(stat_obj.get_interactional_analysis()), 200
+    except ValueError as e:
+        return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
+    except Exception as e:
+        print(traceback.format_exc())
+        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
+
@app.route('/filter/search', methods=["POST"])
 def search_dataset():
    if stat_obj is None:
--- a/server/stat_gen.py
+++ b/server/stat_gen.py
@@ -1,11 +1,13 @@
 import pandas as pd
-import re
-import nltk
 import datetime
+import nltk

 from nltk.corpus import stopwords
-from collections import Counter
-from server.nlp import NLP
+from server.analysis.nlp import NLP
+from server.analysis.temporal import TemporalAnalysis
+from server.analysis.emotional import EmotionalAnalysis
+from server.analysis.interactional import InteractionAnalysis
+from server.analysis.linguistic import LinguisticAnalysis

 DOMAIN_STOPWORDS = {
    "www", "https", "http",
@@ -23,6 +25,7 @@ EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
 class StatGen:
    def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None:
        comments_df = df[["id", "comments"]].explode("comments")
+        comments_df = comments_df[comments_df["comments"].apply(lambda x: isinstance(x, dict))]
        comments_df = pd.json_normalize(comments_df["comments"])

        posts_df = df.drop(columns=["comments"])
@@ -35,9 +38,15 @@ class StatGen:

        self.df = pd.concat([posts_df, comments_df])
        self.df.drop(columns=["post_id"], inplace=True, errors="ignore")
+
        self.nlp = NLP(self.df, "title", "content", domain_topics)
        self._add_extra_cols(self.df)

+        self.temporal_analysis = TemporalAnalysis(self.df)
+        self.emotional_analysis = EmotionalAnalysis(self.df)
+        self.interaction_analysis = InteractionAnalysis(self.df, EXCLUDE_WORDS)
+        self.linguistic_analysis = LinguisticAnalysis(self.df, EXCLUDE_WORDS)
+
        self.original_df = self.df.copy(deep=True)

    ## Private Methods
@@ -50,141 +59,52 @@ class StatGen:
        
        self.nlp.add_emotion_cols()
        self.nlp.add_topic_col()
-
-    def _tokenize(self, text: str):
-        tokens = re.findall(r"\b[a-z]{3,}\b", text)
-        return [t for t in tokens if t not in EXCLUDE_WORDS]
-
-    def _vocab_richness_per_user(self, min_words: int = 20, top_most_used_words: int = 100) -> list:
-        df = self.df.copy()
-        df["content"] = df["content"].fillna("").astype(str).str.lower()
-        df["tokens"] = df["content"].apply(self._tokenize)
-
-        rows = []
-        for author, group in df.groupby("author"):
-            all_tokens = [t for tokens in group["tokens"] for t in tokens]
-
-            total_words = len(all_tokens)
-            unique_words = len(set(all_tokens))
-            events = len(group)
-
-            # Min amount of words for a user, any less than this might give weird results
-            if total_words < min_words:
-                continue
-
-            # 100% = they never reused a word (excluding stop words)
-            vocab_richness = unique_words / total_words
-            avg_words = total_words / max(events, 1)
-
-            counts = Counter(all_tokens)
-            top_words = [
-                {"word": w, "count": int(c)}
-                for w, c in counts.most_common(top_most_used_words)
-            ]
-
-            rows.append({
-                "author": author,
-                "events": int(events),
-                "total_words": int(total_words),
-                "unique_words": int(unique_words),
-                "vocab_richness": round(vocab_richness, 3),
-                "avg_words_per_event": round(avg_words, 2),
-                "top_words": top_words
-            })
-
-        rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
-
-        return rows
+        self.nlp.add_ner_cols()
    
-    def _interaction_graph(self):
-        interactions = {a: {} for a in self.df["author"].dropna().unique()}
-
-        # reply_to refers to the comment id, this allows us to map comment ids to usernames
-        id_to_author = self.df.set_index("id")["author"].to_dict()
-
-        for _, row in self.df.iterrows():
-            a = row["author"]
-            reply_id = row["reply_to"]
-
-            if pd.isna(a) or pd.isna(reply_id) or reply_id == "":
-                continue
-
-            b = id_to_author.get(reply_id)
-            if b is None or a == b:
-                continue
-
-            interactions[a][b] = interactions[a].get(b, 0) + 1
-
-        return interactions
-     
-    def _avg_reply_time_per_emotion(self):
-        df = self.df.copy()
-
-        replies = df[
-            (df["type"] == "comment") &
-            (df["reply_to"].notna()) &
-            (df["reply_to"] != "")
-        ]
-
-        id_to_time = df.set_index("id")["dt"].to_dict()
-
-        def compute_reply_time(row):
-            reply_id = row["reply_to"]
-            parent_time = id_to_time.get(reply_id)
-
-            if parent_time is None:
-                return None
-
-            return (row["dt"] - parent_time).total_seconds()
-        
-        replies["reply_time"] = replies.apply(compute_reply_time, axis=1)
-        emotion_cols = [col for col in df.columns if col.startswith("emotion_") and col not in ("emotion_neutral", "emotion_surprise")]
-        replies["dominant_emotion"] = replies[emotion_cols].idxmax(axis=1)
-        
-        grouped = (
-            replies
-            .groupby("dominant_emotion")["reply_time"]
-            .agg(["mean", "count"])
-            .reset_index()
-        )
-
-        return grouped.to_dict(orient="records")
-
    ## Public
-    def time_analysis(self) -> pd.DataFrame:
-        per_day = (
-            self.df.groupby("date")
-            .size()
-            .reset_index(name="count")
-        )

-        weekday_order = [
-            "Monday", "Tuesday", "Wednesday",
-            "Thursday", "Friday", "Saturday", "Sunday"
-        ]
-
-        self.df["weekday"] = pd.Categorical(
-            self.df["weekday"],
-            categories=weekday_order,
-            ordered=True
-        )
-
-        heatmap = (
-            self.df
-            .groupby(["weekday", "hour"], observed=True)
-            .size()
-            .unstack(fill_value=0)
-            .reindex(columns=range(24), fill_value=0)
-        )
-    
-        heatmap.columns = heatmap.columns.map(str)
-        
-        burst_index = per_day["count"].std() / max(per_day["count"].mean(), 1)

+    # topics over time
+    # emotions over time
+    def get_time_analysis(self) -> pd.DataFrame:
        return {
-            "events_per_day": per_day.to_dict(orient="records"),
-            "weekday_hour_heatmap": heatmap.to_dict(orient="records"),
-            "burstiness": round(burst_index, 2)
+            "events_per_day": self.temporal_analysis.posts_per_day(),
+            "weekday_hour_heatmap": self.temporal_analysis.heatmap()
+        }
+
+    # average topic duration
+    def get_content_analysis(self) -> dict:
+        return {
+            "word_frequencies": self.linguistic_analysis.word_frequencies(),
+            "common_two_phrases": self.linguistic_analysis.ngrams(),
+            "common_three_phrases": self.linguistic_analysis.ngrams(n=3),
+            "average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(),
+            "reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
+        }
+    
+    # average emotion per user
+    # average chain length
+    def get_user_analysis(self) -> dict:
+        return {
+            "top_users": self.interaction_analysis.top_users(),
+            "users": self.interaction_analysis.per_user_analysis(),
+            "interaction_graph": self.interaction_analysis.interaction_graph()
+        }
+    
+    # average / max thread depth
+    # high engagment threads based on volume
+
+    def get_interactional_analysis(self) -> dict:
+        return {
+            "average_thread_depth": self.interaction_analysis.average_thread_depth(),
+            "average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion()
+        }
+    
+    # detect community jargon
+    # in-group and out-group linguistic markers
+    def get_cultural_analysis(self) -> dict:
+        return {
+            "identity_markers": self.linguistic_analysis.identity_markers()
        }
    
    def summary(self) -> dict:
@@ -206,122 +126,6 @@ class StatGen:
            },
            "sources": self.df["source"].dropna().unique().tolist()
        }
-
-    def content_analysis(self, limit: int = 100) -> dict:
-        texts = (
-            self.df["content"]
-            .dropna()
-            .astype(str)
-            .str.lower()
-        )
-
-        words = []
-        for text in texts:
-            tokens = re.findall(r"\b[a-z]{3,}\b", text)
-            words.extend(
-                w for w in tokens
-                if w not in EXCLUDE_WORDS
-            )
-
-        counts = Counter(words)
-
-        word_frequencies = (
-            pd.DataFrame(counts.items(), columns=["word", "count"])
-            .sort_values("count", ascending=False)
-            .head(limit)
-            .reset_index(drop=True)
-        )
-
-        emotion_exclusions = [
-            "emotion_neutral",
-            "emotion_surprise"
-        ]
-
-        emotion_cols = [
-            col for col in self.df.columns
-            if col.startswith("emotion_") and col not in emotion_exclusions
-        ]
-
-        counts = (
-            self.df[
-                (self.df["topic"] != "Misc")
-            ]
-            .groupby("topic")
-            .size()
-            .rename("n")
-        )
-
-        avg_emotion_by_topic = (
-            self.df[
-                (self.df["topic"] != "Misc")
-            ]
-            .groupby("topic")[emotion_cols]
-            .mean()
-            .reset_index()
-        )
-
-        avg_emotion_by_topic = avg_emotion_by_topic.merge(
-            counts,
-            on="topic"
-        )
-
-        return {
-            "word_frequencies": word_frequencies.to_dict(orient='records'),
-            "average_emotion_by_topic": avg_emotion_by_topic.to_dict(orient='records'),
-            "reply_time_by_emotion": self._avg_reply_time_per_emotion()
-        }
-    
-    def user_analysis(self) -> dict:
-        counts = (
-            self.df.groupby(["author", "source"])
-            .size()
-            .sort_values(ascending=False)
-        )
-
-        top_users = [
-            {"author": author, "source": source, "count": int(count)}
-            for (author, source), count in counts.items()
-        ]
-
-        per_user = (
-            self.df.groupby(["author", "type"])
-            .size()
-            .unstack(fill_value=0)
-        )
-
-        # ensure columns always exist
-        for col in ("post", "comment"):
-            if col not in per_user.columns:
-                per_user[col] = 0
-
-        per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
-        per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
-        per_user = per_user.sort_values("comment_post_ratio", ascending=True)
-        per_user_records = per_user.reset_index().to_dict(orient="records")
-
-        vocab_rows = self._vocab_richness_per_user()
-        vocab_by_author = {row["author"]: row for row in vocab_rows}
-
-        # merge vocab richness + per_user information
-        merged_users = []
-        for row in per_user_records:
-            author = row["author"]
-            merged_users.append({
-                "author": author,
-                "post": int(row.get("post", 0)),
-                "comment": int(row.get("comment", 0)),
-                "comment_post_ratio": float(row.get("comment_post_ratio", 0)),
-                "comment_share": float(row.get("comment_share", 0)),
-                "vocab": vocab_by_author.get(author)
-            })
-
-        merged_users.sort(key=lambda u: u["comment_post_ratio"])
-
-        return {
-            "top_users": top_users,
-            "users": merged_users,
-            "interaction_graph": self._interaction_graph()
-        }
        
    def search(self, search_query: str) -> dict:
        self.df = self.df[
Author	SHA1	Message	Date
Dylan De Faoite	257eb80de7	feat(api): add average thread length per emotion	2026-02-23 19:09:48 +00:00
Dylan De Faoite	3a23b1f0c8	feat(api): add average thread depth	2026-02-23 18:14:34 +00:00
Dylan De Faoite	8c76476cd3	fix(api): broken analysis calls due to overlap in attribute and method names	2026-02-23 18:14:24 +00:00
Dylan De Faoite	397986dc89	refactor(frontend): rename InteractionStats to UserStats	2026-02-23 17:15:14 +00:00
Dylan De Faoite	04b7094036	feat(api): add cultural endpoint	2026-02-23 17:14:12 +00:00
Dylan De Faoite	c11b4bb85b	refactor: move NLP to analysis dir	2026-02-17 18:51:15 +00:00
Dylan De Faoite	289f4254db	fix(backend): broken null timestamp handling	2026-02-17 18:49:03 +00:00
Dylan De Faoite	ed0dd8cdbc	feat(nlp): add Named Entity Recognition to dataset	2026-02-17 18:48:45 +00:00
Dylan De Faoite	8fbf32b67c	feat(linguistic): add most common 2, 3 length n-grams	2026-02-17 18:26:40 +00:00
Dylan De Faoite	d27ba3fca4	refactor: extract interaction and linguistic analysis into dedicated classes	2026-02-17 18:00:16 +00:00
Dylan De Faoite	83010aee55	refactor: extract emotional analysis out of stat_gen	2026-02-17 17:40:29 +00:00
Dylan De Faoite	70b34036db	refactor: extract temporal analysis into it's own class	2026-02-17 17:35:28 +00:00
Dylan De Faoite	563212c98e	perf(frontend): add filter for low interaction graphs & deleted users	2026-02-16 17:09:22 +00:00
Dylan De Faoite	4f577abd4f	feat(frontend): add 3d interaction graph	2026-02-16 17:03:51 +00:00
Dylan De Faoite	7c1e069152	fix(backend): comment parsing didn't account of NaN values	2026-02-16 16:41:16 +00:00