Compare commits
15 Commits
fb20c3ab1b
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 257eb80de7 | |||
| 3a23b1f0c8 | |||
| 8c76476cd3 | |||
| 397986dc89 | |||
| 04b7094036 | |||
| c11b4bb85b | |||
| 289f4254db | |||
| ed0dd8cdbc | |||
| 8fbf32b67c | |||
| d27ba3fca4 | |||
| 83010aee55 | |||
| 70b34036db | |||
| 563212c98e | |||
| 4f577abd4f | |||
| 7c1e069152 |
317
frontend/package-lock.json
generated
317
frontend/package-lock.json
generated
@@ -15,6 +15,7 @@
|
||||
"headlessui": "^0.0.0",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
"react-force-graph-3d": "^1.29.1",
|
||||
"react-router-dom": "^7.13.0",
|
||||
"recharts": "^3.7.0"
|
||||
},
|
||||
@@ -267,6 +268,15 @@
|
||||
"@babel/core": "^7.0.0-0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz",
|
||||
"integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/template": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
|
||||
@@ -2022,6 +2032,12 @@
|
||||
"url": "https://github.com/sponsors/tannerlinsley"
|
||||
}
|
||||
},
|
||||
"node_modules/@tweenjs/tween.js": {
|
||||
"version": "25.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-25.0.0.tgz",
|
||||
"integrity": "sha512-XKLA6syeBUaPzx4j3qwMqzzq+V4uo72BnlbOjmuljLrRqdsd3qnzvZZoxvMHZ23ndsRS4aufU6JOZYpCbU6T1A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/babel__core": {
|
||||
"version": "7.20.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
|
||||
@@ -2488,6 +2504,31 @@
|
||||
"vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/3d-force-graph": {
|
||||
"version": "1.79.1",
|
||||
"resolved": "https://registry.npmjs.org/3d-force-graph/-/3d-force-graph-1.79.1.tgz",
|
||||
"integrity": "sha512-iscIVt4jWjJ11KEEswgOIOWk8Ew4EFKHRyERJXJ0ouycqzHCtWwb9E5imnxS5rYF1f1IESkFNAfB+h3EkU0Irw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"accessor-fn": "1",
|
||||
"kapsule": "^1.16",
|
||||
"three": ">=0.118 <1",
|
||||
"three-forcegraph": "1",
|
||||
"three-render-objects": "^1.35"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/accessor-fn": {
|
||||
"version": "1.5.3",
|
||||
"resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz",
|
||||
"integrity": "sha512-rkAofCwe/FvYFUlMB0v0gWmhqtfAtV1IUkdPbfhTUyYniu5LrC0A0UJkTH0Jv3S8SvwkmfuAlY+mQIJATdocMA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/acorn": {
|
||||
"version": "8.15.0",
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
|
||||
@@ -2793,6 +2834,12 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-binarytree": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/d3-binarytree/-/d3-binarytree-1.0.2.tgz",
|
||||
"integrity": "sha512-cElUNH+sHu95L04m92pG73t2MEJXKu+GeKUN1TJkFsu93E5W8E9Sc3kHEGJKgenGvj19m6upSn2EunvMgMD2Yw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/d3-cloud": {
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/d3-cloud/-/d3-cloud-1.2.8.tgz",
|
||||
@@ -2826,6 +2873,22 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-force-3d": {
|
||||
"version": "3.0.6",
|
||||
"resolved": "https://registry.npmjs.org/d3-force-3d/-/d3-force-3d-3.0.6.tgz",
|
||||
"integrity": "sha512-4tsKHUPLOVkyfEffZo1v6sFHvGFwAIIjt/W8IThbp08DYAsXZck+2pSHEG5W1+gQgEvFLdZkYvmJAbRM2EzMnA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"d3-binarytree": "1",
|
||||
"d3-dispatch": "1 - 3",
|
||||
"d3-octree": "1",
|
||||
"d3-quadtree": "1 - 3",
|
||||
"d3-timer": "1 - 3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-format": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz",
|
||||
@@ -2847,6 +2910,12 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-octree": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-octree/-/d3-octree-1.1.0.tgz",
|
||||
"integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/d3-path": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
|
||||
@@ -2856,6 +2925,15 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-quadtree": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/d3-quadtree/-/d3-quadtree-3.0.1.tgz",
|
||||
"integrity": "sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/d3-scale": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
|
||||
@@ -2958,6 +3036,18 @@
|
||||
"d3-selection": "2 - 3"
|
||||
}
|
||||
},
|
||||
"node_modules/data-bind-mapper": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/data-bind-mapper/-/data-bind-mapper-1.0.3.tgz",
|
||||
"integrity": "sha512-QmU3lyEnbENQPo0M1F9BMu4s6cqNNp8iJA+b/HP2sSb7pf3dxwF3+EP1eO69rwBfH9kFJ1apmzrtogAmVt2/Xw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"accessor-fn": "1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.4.3",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
|
||||
@@ -3419,6 +3509,20 @@
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/float-tooltip": {
|
||||
"version": "1.7.5",
|
||||
"resolved": "https://registry.npmjs.org/float-tooltip/-/float-tooltip-1.7.5.tgz",
|
||||
"integrity": "sha512-/kXzuDnnBqyyWyhDMH7+PfP8J/oXiAavGzcRxASOMRHFuReDtofizLLJsf7nnDLAfEaMW4pVWaXrAjtnglpEkg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"d3-selection": "2 - 3",
|
||||
"kapsule": "^1.16",
|
||||
"preact": "10"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.11",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
|
||||
@@ -3722,11 +3826,19 @@
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/jerrypick": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/jerrypick/-/jerrypick-1.1.2.tgz",
|
||||
"integrity": "sha512-YKnxXEekXKzhpf7CLYA0A+oDP8V0OhICNCr5lv96FvSsDEmrb0GKM776JgQvHTMjr7DTTPEVv/1Ciaw0uEWzBA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/js-tokens": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/js-yaml": {
|
||||
@@ -3789,6 +3901,18 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/kapsule": {
|
||||
"version": "1.16.3",
|
||||
"resolved": "https://registry.npmjs.org/kapsule/-/kapsule-1.16.3.tgz",
|
||||
"integrity": "sha512-4+5mNNf4vZDSwPhKprKwz3330iisPrb08JyMgbsdFrimBCKNHecua/WBwvVg3n7vwx0C1ARjfhwIpbrbd9n5wg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"lodash-es": "4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/keyv": {
|
||||
"version": "4.5.4",
|
||||
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
|
||||
@@ -3835,6 +3959,12 @@
|
||||
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash-es": {
|
||||
"version": "4.17.23",
|
||||
"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz",
|
||||
"integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.debounce": {
|
||||
"version": "4.0.8",
|
||||
"resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz",
|
||||
@@ -3848,6 +3978,18 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/loose-envify": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
|
||||
"integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"js-tokens": "^3.0.0 || ^4.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"loose-envify": "cli.js"
|
||||
}
|
||||
},
|
||||
"node_modules/lru-cache": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
|
||||
@@ -3934,6 +4076,44 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ngraph.events": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/ngraph.events/-/ngraph.events-1.4.0.tgz",
|
||||
"integrity": "sha512-NeDGI4DSyjBNBRtA86222JoYietsmCXbs8CEB0dZ51Xeh4lhVl1y3wpWLumczvnha8sFQIW4E0vvVWwgmX2mGw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/ngraph.forcelayout": {
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/ngraph.forcelayout/-/ngraph.forcelayout-3.3.1.tgz",
|
||||
"integrity": "sha512-MKBuEh1wujyQHFTW57y5vd/uuEOK0XfXYxm3lC7kktjJLRdt/KEKEknyOlc6tjXflqBKEuYBBcu7Ax5VY+S6aw==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"ngraph.events": "^1.0.0",
|
||||
"ngraph.merge": "^1.0.0",
|
||||
"ngraph.random": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ngraph.graph": {
|
||||
"version": "20.1.2",
|
||||
"resolved": "https://registry.npmjs.org/ngraph.graph/-/ngraph.graph-20.1.2.tgz",
|
||||
"integrity": "sha512-W/G3GBR3Y5UxMLHTUCPP9v+pbtpzwuAEIqP5oZV+9IwgxAIEZwh+Foc60iPc1idlnK7Zxu0p3puxAyNmDvBd0Q==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"ngraph.events": "^1.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ngraph.merge": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ngraph.merge/-/ngraph.merge-1.0.0.tgz",
|
||||
"integrity": "sha512-5J8YjGITUJeapsomtTALYsw7rFveYkM+lBj3QiYZ79EymQcuri65Nw3knQtFxQBU1r5iOaVRXrSwMENUPK62Vg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ngraph.random": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/ngraph.random/-/ngraph.random-1.2.0.tgz",
|
||||
"integrity": "sha512-4EUeAGbB2HWX9njd6bP6tciN6ByJfoaAvmVL9QTaZSeXrW46eNGA9GajiXiPBbvFqxUWFkEbyo6x5qsACUuVfA==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/node-releases": {
|
||||
"version": "2.0.27",
|
||||
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz",
|
||||
@@ -3941,6 +4121,15 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/object-assign": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/optionator": {
|
||||
"version": "0.9.4",
|
||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
|
||||
@@ -4044,6 +4233,18 @@
|
||||
"url": "https://github.com/sponsors/jonschlinkert"
|
||||
}
|
||||
},
|
||||
"node_modules/polished": {
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/polished/-/polished-4.3.1.tgz",
|
||||
"integrity": "sha512-OBatVyC/N7SCW/FaDHrSd+vn0o5cS855TOmYi4OkdWUMSJCET/xip//ch8xGUvtr3i44X9LVyWwQlRMTN3pwSA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.17.8"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/postcss": {
|
||||
"version": "8.5.6",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
|
||||
@@ -4073,6 +4274,16 @@
|
||||
"node": "^10 || ^12 || >=14"
|
||||
}
|
||||
},
|
||||
"node_modules/preact": {
|
||||
"version": "10.28.3",
|
||||
"resolved": "https://registry.npmjs.org/preact/-/preact-10.28.3.tgz",
|
||||
"integrity": "sha512-tCmoRkPQLpBeWzpmbhryairGnhW9tKV6c6gr/w+RhoRoKEJwsjzipwp//1oCpGPOchvSLaAPlpcJi9MwMmoPyA==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/preact"
|
||||
}
|
||||
},
|
||||
"node_modules/prelude-ls": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
|
||||
@@ -4083,6 +4294,23 @@
|
||||
"node": ">= 0.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/prop-types": {
|
||||
"version": "15.8.1",
|
||||
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
|
||||
"integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"loose-envify": "^1.4.0",
|
||||
"object-assign": "^4.1.1",
|
||||
"react-is": "^16.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/prop-types/node_modules/react-is": {
|
||||
"version": "16.13.1",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
|
||||
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
@@ -4120,6 +4348,23 @@
|
||||
"react": "^19.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/react-force-graph-3d": {
|
||||
"version": "1.29.1",
|
||||
"resolved": "https://registry.npmjs.org/react-force-graph-3d/-/react-force-graph-3d-1.29.1.tgz",
|
||||
"integrity": "sha512-5Vp+PGpYnO+zLwgK2NvNqdXHvsWLrFzpDfJW1vUA1twjo9SPvXqfUYQrnRmAbD+K2tOxkZw1BkbH31l5b4TWHg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"3d-force-graph": "^1.79",
|
||||
"prop-types": "15",
|
||||
"react-kapsule": "^2.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/react-is": {
|
||||
"version": "19.2.4",
|
||||
"resolved": "https://registry.npmjs.org/react-is/-/react-is-19.2.4.tgz",
|
||||
@@ -4127,6 +4372,21 @@
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/react-kapsule": {
|
||||
"version": "2.5.7",
|
||||
"resolved": "https://registry.npmjs.org/react-kapsule/-/react-kapsule-2.5.7.tgz",
|
||||
"integrity": "sha512-kifAF4ZPD77qZKc4CKLmozq6GY1sBzPEJTIJb0wWFK6HsePJatK3jXplZn2eeAt3x67CDozgi7/rO8fNQ/AL7A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"jerrypick": "^1.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">=16.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/react-redux": {
|
||||
"version": "9.2.0",
|
||||
"resolved": "https://registry.npmjs.org/react-redux/-/react-redux-9.2.0.tgz",
|
||||
@@ -4413,12 +4673,67 @@
|
||||
"integrity": "sha512-05PUHKSNE8ou2dwIxTngl4EzcnsCDZGJ/iCLtDflR/SHB/ny14rXc+qU5P4mG9JkusiV7EivzY9Mhm55AzAvCg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/three": {
|
||||
"version": "0.182.0",
|
||||
"resolved": "https://registry.npmjs.org/three/-/three-0.182.0.tgz",
|
||||
"integrity": "sha512-GbHabT+Irv+ihI1/f5kIIsZ+Ef9Sl5A1Y7imvS5RQjWgtTPfPnZ43JmlYI7NtCRDK9zir20lQpfg8/9Yd02OvQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/three-forcegraph": {
|
||||
"version": "1.43.1",
|
||||
"resolved": "https://registry.npmjs.org/three-forcegraph/-/three-forcegraph-1.43.1.tgz",
|
||||
"integrity": "sha512-lQnYPLvR31gb91mF5xHhU0jPHJgBPw9QB23R6poCk8Tgvz8sQtq7wTxwClcPdfKCBbHXsb7FSqK06Osiu1kQ5A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"accessor-fn": "1",
|
||||
"d3-array": "1 - 3",
|
||||
"d3-force-3d": "2 - 3",
|
||||
"d3-scale": "1 - 4",
|
||||
"d3-scale-chromatic": "1 - 3",
|
||||
"data-bind-mapper": "1",
|
||||
"kapsule": "^1.16",
|
||||
"ngraph.forcelayout": "3",
|
||||
"ngraph.graph": "20",
|
||||
"tinycolor2": "1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"three": ">=0.118.3"
|
||||
}
|
||||
},
|
||||
"node_modules/three-render-objects": {
|
||||
"version": "1.40.4",
|
||||
"resolved": "https://registry.npmjs.org/three-render-objects/-/three-render-objects-1.40.4.tgz",
|
||||
"integrity": "sha512-Ukpu1pei3L5r809izvjsZxwuRcYLiyn6Uvy3lZ9bpMTdvj3i6PeX6w++/hs2ZS3KnEzGjb6YvTvh4UQuwHTDJg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@tweenjs/tween.js": "18 - 25",
|
||||
"accessor-fn": "1",
|
||||
"float-tooltip": "^1.7",
|
||||
"kapsule": "^1.16",
|
||||
"polished": "4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"three": ">=0.168"
|
||||
}
|
||||
},
|
||||
"node_modules/tiny-invariant": {
|
||||
"version": "1.3.3",
|
||||
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
|
||||
"integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tinycolor2": {
|
||||
"version": "1.6.0",
|
||||
"resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz",
|
||||
"integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tinyglobby": {
|
||||
"version": "0.2.15",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
"headlessui": "^0.0.0",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
"react-force-graph-3d": "^1.29.1",
|
||||
"react-router-dom": "^7.13.0",
|
||||
"recharts": "^3.7.0"
|
||||
},
|
||||
|
||||
61
frontend/src/components/UserStats.tsx
Normal file
61
frontend/src/components/UserStats.tsx
Normal file
@@ -0,0 +1,61 @@
|
||||
import ForceGraph3D from "react-force-graph-3d";
|
||||
|
||||
import {
|
||||
type UserAnalysisResponse,
|
||||
type InteractionGraph
|
||||
} from '../types/ApiTypes';
|
||||
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
|
||||
const styles = StatsStyling;
|
||||
|
||||
function ApiToGraphData(apiData: InteractionGraph) {
|
||||
const nodes = Object.keys(apiData).map(username => ({ id: username }));
|
||||
const links = [];
|
||||
|
||||
for (const [source, targets] of Object.entries(apiData)) {
|
||||
for (const [target, count] of Object.entries(targets)) {
|
||||
links.push({ source, target, value: count });
|
||||
}
|
||||
}
|
||||
|
||||
// drop low-value and deleted interactions to reduce clutter
|
||||
const filteredLinks = links.filter(link =>
|
||||
link.value >= 2 &&
|
||||
link.source !== "[deleted]" &&
|
||||
link.target !== "[deleted]"
|
||||
);
|
||||
|
||||
// also filter out nodes that are no longer connected after link filtering
|
||||
const connectedNodeIds = new Set(filteredLinks.flatMap(link => [link.source, link.target]));
|
||||
const filteredNodes = nodes.filter(node => connectedNodeIds.has(node.id));
|
||||
|
||||
return { nodes: filteredNodes, links: filteredLinks};
|
||||
}
|
||||
|
||||
|
||||
const UserStats = (props: { data: UserAnalysisResponse }) => {
|
||||
const graphData = ApiToGraphData(props.data.interaction_graph);
|
||||
|
||||
return (
|
||||
<div style={styles.page}>
|
||||
<h2 style={styles.sectionTitle}>User Interaction Graph</h2>
|
||||
<p style={styles.sectionSubtitle}>
|
||||
This graph visualizes interactions between users based on comments and replies.
|
||||
Nodes represent users, and edges represent interactions (e.g., comments or replies) between them.
|
||||
</p>
|
||||
<div>
|
||||
<ForceGraph3D
|
||||
graphData={graphData}
|
||||
nodeAutoColorBy="id"
|
||||
linkDirectionalParticles={2}
|
||||
linkDirectionalParticleSpeed={0.005}
|
||||
linkWidth={(link) => Math.sqrt(link.value)}
|
||||
nodeLabel={(node) => `${node.id}`}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default UserStats;
|
||||
@@ -3,6 +3,7 @@ import axios from "axios";
|
||||
import StatsStyling from "../styles/stats_styling";
|
||||
import SummaryStats from "../components/SummaryStats";
|
||||
import EmotionalStats from "../components/EmotionalStats";
|
||||
import InteractionStats from "../components/UserStats";
|
||||
|
||||
import {
|
||||
type SummaryResponse,
|
||||
@@ -16,7 +17,7 @@ const styles = StatsStyling;
|
||||
const StatPage = () => {
|
||||
const [error, setError] = useState('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [activeView, setActiveView] = useState<"summary" | "emotional">("summary");
|
||||
const [activeView, setActiveView] = useState<"summary" | "emotional" | "interaction">("summary");
|
||||
|
||||
const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
|
||||
const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
|
||||
@@ -133,6 +134,13 @@ return (
|
||||
>
|
||||
Emotional
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={() => setActiveView("interaction")}
|
||||
style={activeView === "interaction" ? styles.buttonPrimary : styles.buttonSecondary}
|
||||
>
|
||||
Interaction
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{activeView === "summary" && (
|
||||
@@ -154,6 +162,10 @@ return (
|
||||
</div>
|
||||
)}
|
||||
|
||||
{activeView === "interaction" && userData && (
|
||||
<InteractionStats data={userData} />
|
||||
)}
|
||||
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -35,9 +35,12 @@ type User = {
|
||||
vocab?: Vocab | null;
|
||||
};
|
||||
|
||||
type InteractionGraph = Record<string, Record<string, number>>;
|
||||
|
||||
type UserAnalysisResponse = {
|
||||
top_users: TopUser[];
|
||||
users: User[];
|
||||
interaction_graph: InteractionGraph;
|
||||
};
|
||||
|
||||
// Time Analysis
|
||||
@@ -89,6 +92,7 @@ export type {
|
||||
TopUser,
|
||||
Vocab,
|
||||
User,
|
||||
InteractionGraph,
|
||||
UserAnalysisResponse,
|
||||
FrequencyWord,
|
||||
AverageEmotionByTopic,
|
||||
|
||||
41
server/analysis/emotional.py
Normal file
41
server/analysis/emotional.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import pandas as pd
|
||||
|
||||
class EmotionalAnalysis:
|
||||
def __init__(self, df: pd.DataFrame):
|
||||
self.df = df
|
||||
|
||||
def avg_emotion_by_topic(self) -> dict:
|
||||
emotion_exclusions = [
|
||||
"emotion_neutral",
|
||||
"emotion_surprise"
|
||||
]
|
||||
|
||||
emotion_cols = [
|
||||
col for col in self.df.columns
|
||||
if col.startswith("emotion_") and col not in emotion_exclusions
|
||||
]
|
||||
|
||||
counts = (
|
||||
self.df[
|
||||
(self.df["topic"] != "Misc")
|
||||
]
|
||||
.groupby("topic")
|
||||
.size()
|
||||
.rename("n")
|
||||
)
|
||||
|
||||
avg_emotion_by_topic = (
|
||||
self.df[
|
||||
(self.df["topic"] != "Misc")
|
||||
]
|
||||
.groupby("topic")[emotion_cols]
|
||||
.mean()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
avg_emotion_by_topic = avg_emotion_by_topic.merge(
|
||||
counts,
|
||||
on="topic"
|
||||
)
|
||||
|
||||
return avg_emotion_by_topic.to_dict(orient='records')
|
||||
208
server/analysis/interactional.py
Normal file
208
server/analysis/interactional.py
Normal file
@@ -0,0 +1,208 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
from collections import Counter
|
||||
|
||||
class InteractionAnalysis:
|
||||
def __init__(self, df: pd.DataFrame, word_exclusions: set[str]):
|
||||
self.df = df
|
||||
self.word_exclusions = word_exclusions
|
||||
|
||||
def _tokenize(self, text: str):
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
return [t for t in tokens if t not in self.word_exclusions]
|
||||
|
||||
def _vocab_richness_per_user(self, min_words: int = 20, top_most_used_words: int = 100) -> list:
|
||||
df = self.df.copy()
|
||||
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
||||
df["tokens"] = df["content"].apply(self._tokenize)
|
||||
|
||||
rows = []
|
||||
for author, group in df.groupby("author"):
|
||||
all_tokens = [t for tokens in group["tokens"] for t in tokens]
|
||||
|
||||
total_words = len(all_tokens)
|
||||
unique_words = len(set(all_tokens))
|
||||
events = len(group)
|
||||
|
||||
# Min amount of words for a user, any less than this might give weird results
|
||||
if total_words < min_words:
|
||||
continue
|
||||
|
||||
# 100% = they never reused a word (excluding stop words)
|
||||
vocab_richness = unique_words / total_words
|
||||
avg_words = total_words / max(events, 1)
|
||||
|
||||
counts = Counter(all_tokens)
|
||||
top_words = [
|
||||
{"word": w, "count": int(c)}
|
||||
for w, c in counts.most_common(top_most_used_words)
|
||||
]
|
||||
|
||||
rows.append({
|
||||
"author": author,
|
||||
"events": int(events),
|
||||
"total_words": int(total_words),
|
||||
"unique_words": int(unique_words),
|
||||
"vocab_richness": round(vocab_richness, 3),
|
||||
"avg_words_per_event": round(avg_words, 2),
|
||||
"top_words": top_words
|
||||
})
|
||||
|
||||
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
|
||||
|
||||
return rows
|
||||
|
||||
def top_users(self) -> list:
|
||||
counts = (
|
||||
self.df.groupby(["author", "source"])
|
||||
.size()
|
||||
.sort_values(ascending=False)
|
||||
)
|
||||
|
||||
top_users = [
|
||||
{"author": author, "source": source, "count": int(count)}
|
||||
for (author, source), count in counts.items()
|
||||
]
|
||||
|
||||
return top_users
|
||||
|
||||
def per_user_analysis(self) -> dict:
|
||||
per_user = (
|
||||
self.df.groupby(["author", "type"])
|
||||
.size()
|
||||
.unstack(fill_value=0)
|
||||
)
|
||||
|
||||
# ensure columns always exist
|
||||
for col in ("post", "comment"):
|
||||
if col not in per_user.columns:
|
||||
per_user[col] = 0
|
||||
|
||||
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
|
||||
per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
|
||||
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
|
||||
per_user_records = per_user.reset_index().to_dict(orient="records")
|
||||
|
||||
vocab_rows = self._vocab_richness_per_user()
|
||||
vocab_by_author = {row["author"]: row for row in vocab_rows}
|
||||
|
||||
# merge vocab richness + per_user information
|
||||
merged_users = []
|
||||
for row in per_user_records:
|
||||
author = row["author"]
|
||||
merged_users.append({
|
||||
"author": author,
|
||||
"post": int(row.get("post", 0)),
|
||||
"comment": int(row.get("comment", 0)),
|
||||
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
|
||||
"comment_share": float(row.get("comment_share", 0)),
|
||||
"vocab": vocab_by_author.get(author)
|
||||
})
|
||||
|
||||
merged_users.sort(key=lambda u: u["comment_post_ratio"])
|
||||
|
||||
return merged_users
|
||||
|
||||
def interaction_graph(self):
|
||||
interactions = {a: {} for a in self.df["author"].dropna().unique()}
|
||||
|
||||
# reply_to refers to the comment id, this allows us to map comment ids to usernames
|
||||
id_to_author = self.df.set_index("id")["author"].to_dict()
|
||||
|
||||
for _, row in self.df.iterrows():
|
||||
a = row["author"]
|
||||
reply_id = row["reply_to"]
|
||||
|
||||
if pd.isna(a) or pd.isna(reply_id) or reply_id == "":
|
||||
continue
|
||||
|
||||
b = id_to_author.get(reply_id)
|
||||
if b is None or a == b:
|
||||
continue
|
||||
|
||||
interactions[a][b] = interactions[a].get(b, 0) + 1
|
||||
|
||||
return interactions
|
||||
|
||||
def average_thread_depth(self):
|
||||
depths = []
|
||||
id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
|
||||
for _, row in self.df.iterrows():
|
||||
depth = 0
|
||||
current_id = row["id"]
|
||||
|
||||
while True:
|
||||
reply_to = id_to_reply.get(current_id)
|
||||
if pd.isna(reply_to) or reply_to == "":
|
||||
break
|
||||
|
||||
depth += 1
|
||||
current_id = reply_to
|
||||
|
||||
depths.append(depth)
|
||||
|
||||
if not depths:
|
||||
return 0
|
||||
|
||||
return round(sum(depths) / len(depths), 2)
|
||||
|
||||
def average_thread_length_by_emotion(self):
|
||||
emotion_exclusions = {"emotion_neutral", "emotion_surprise"}
|
||||
|
||||
emotion_cols = [
|
||||
c for c in self.df.columns
|
||||
if c.startswith("emotion_") and c not in emotion_exclusions
|
||||
]
|
||||
|
||||
id_to_reply = self.df.set_index("id")["reply_to"].to_dict()
|
||||
length_cache = {}
|
||||
|
||||
def thread_length_from(start_id):
|
||||
if start_id in length_cache:
|
||||
return length_cache[start_id]
|
||||
|
||||
seen = set()
|
||||
length = 1
|
||||
current = start_id
|
||||
|
||||
while True:
|
||||
if current in seen:
|
||||
# infinite loop shouldn't happen, but just in case
|
||||
break
|
||||
seen.add(current)
|
||||
|
||||
reply_to = id_to_reply.get(current)
|
||||
|
||||
if reply_to is None or (isinstance(reply_to, float) and pd.isna(reply_to)) or reply_to == "":
|
||||
break
|
||||
|
||||
length += 1
|
||||
current = reply_to
|
||||
|
||||
if current in length_cache:
|
||||
length += (length_cache[current] - 1)
|
||||
break
|
||||
|
||||
length_cache[start_id] = length
|
||||
return length
|
||||
|
||||
emotion_to_lengths = {}
|
||||
|
||||
# Fill NaNs in emotion cols to avoid max() issues
|
||||
emo_df = self.df[["id"] + emotion_cols].copy()
|
||||
emo_df[emotion_cols] = emo_df[emotion_cols].fillna(0)
|
||||
|
||||
for _, row in emo_df.iterrows():
|
||||
msg_id = row["id"]
|
||||
length = thread_length_from(msg_id)
|
||||
|
||||
emotions = {c: row[c] for c in emotion_cols}
|
||||
dominant = max(emotions, key=emotions.get)
|
||||
|
||||
emotion_to_lengths.setdefault(dominant, []).append(length)
|
||||
|
||||
return {
|
||||
emotion: round(sum(lengths) / len(lengths), 2)
|
||||
for emotion, lengths in emotion_to_lengths.items()
|
||||
}
|
||||
113
server/analysis/linguistic.py
Normal file
113
server/analysis/linguistic.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
from collections import Counter
|
||||
from itertools import islice
|
||||
|
||||
class LinguisticAnalysis:
|
||||
def __init__(self, df: pd.DataFrame, word_exclusions: set[str]):
|
||||
self.df = df
|
||||
self.word_exclusions = word_exclusions
|
||||
|
||||
def _tokenize(self, text: str):
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
return [t for t in tokens if t not in self.word_exclusions]
|
||||
|
||||
def _clean_text(self, text: str) -> str:
|
||||
text = re.sub(r"http\S+", "", text) # remove URLs
|
||||
text = re.sub(r"www\S+", "", text)
|
||||
text = re.sub(r"&\w+;", "", text) # remove HTML entities
|
||||
text = re.sub(r"\bamp\b", "", text) # remove stray amp
|
||||
text = re.sub(r"\S+\.(jpg|jpeg|png|webp|gif)", "", text)
|
||||
return text
|
||||
|
||||
def word_frequencies(self, limit: int = 100) -> dict:
|
||||
texts = (
|
||||
self.df["content"]
|
||||
.dropna()
|
||||
.astype(str)
|
||||
.str.lower()
|
||||
)
|
||||
|
||||
words = []
|
||||
for text in texts:
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
words.extend(
|
||||
w for w in tokens
|
||||
if w not in self.word_exclusions
|
||||
)
|
||||
|
||||
|
||||
counts = Counter(words)
|
||||
|
||||
word_frequencies = (
|
||||
pd.DataFrame(counts.items(), columns=["word", "count"])
|
||||
.sort_values("count", ascending=False)
|
||||
.head(limit)
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
return word_frequencies.to_dict(orient="records")
|
||||
|
||||
def ngrams(self, n=2, limit=100):
|
||||
texts = self.df["content"].dropna().astype(str).apply(self._clean_text).str.lower()
|
||||
all_ngrams = []
|
||||
|
||||
for text in texts:
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
|
||||
# stop word removal causes strange behaviors in ngrams
|
||||
#tokens = [w for w in tokens if w not in self.word_exclusions]
|
||||
|
||||
ngrams = zip(*(islice(tokens, i, None) for i in range(n)))
|
||||
all_ngrams.extend([" ".join(ng) for ng in ngrams])
|
||||
|
||||
counts = Counter(all_ngrams)
|
||||
|
||||
return (
|
||||
pd.DataFrame(counts.items(), columns=["ngram", "count"])
|
||||
.sort_values("count", ascending=False)
|
||||
.head(limit)
|
||||
.to_dict(orient="records")
|
||||
)
|
||||
|
||||
def identity_markers(self):
|
||||
df = self.df.copy()
|
||||
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
||||
|
||||
in_group_words = {"we", "us", "our", "ourselves"}
|
||||
out_group_words = {"they", "them", "their", "themselves"}
|
||||
|
||||
emotion_exclusions = [
|
||||
"emotion_neutral",
|
||||
"emotion_surprise"
|
||||
]
|
||||
|
||||
emotion_cols = [
|
||||
col for col in self.df.columns
|
||||
if col.startswith("emotion_") and col not in emotion_exclusions
|
||||
]
|
||||
in_count = 0
|
||||
out_count = 0
|
||||
in_emotions = {e: 0 for e in emotion_cols}
|
||||
out_emotions = {e: 0 for e in emotion_cols}
|
||||
total = 0
|
||||
|
||||
for post in df:
|
||||
text = post["content"]
|
||||
tokens = re.findall(r"\b[a-z]{2,}\b", text)
|
||||
total += len(tokens)
|
||||
in_count += sum(t in in_group_words for t in tokens)
|
||||
out_count += sum(t in out_group_words for t in tokens)
|
||||
|
||||
emotions = post[emotion_cols]
|
||||
print(emotions)
|
||||
|
||||
|
||||
|
||||
return {
|
||||
"in_group_usage": in_count,
|
||||
"out_group_usage": out_count,
|
||||
"in_group_ratio": round(in_count / max(total, 1), 5),
|
||||
"out_group_ratio": round(out_count / max(total, 1), 5),
|
||||
}
|
||||
@@ -9,6 +9,7 @@ from sentence_transformers import SentenceTransformer
|
||||
class NLP:
|
||||
_topic_models: dict[str, SentenceTransformer] = {}
|
||||
_emotion_classifiers: dict[str, Any] = {}
|
||||
_entity_recognizers: dict[str, Any] = {}
|
||||
_topic_embedding_cache: dict[tuple[str, ...], np.ndarray] = {}
|
||||
|
||||
def __init__(
|
||||
@@ -29,6 +30,9 @@ class NLP:
|
||||
self.emotion_classifier = self._get_emotion_classifier(
|
||||
self.device_str, self.pipeline_device
|
||||
)
|
||||
self.entity_recognizer = self._get_entity_recognizer(
|
||||
self.device_str, self.pipeline_device
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
if self.use_cuda and "out of memory" in str(exc).lower():
|
||||
torch.cuda.empty_cache()
|
||||
@@ -86,6 +90,27 @@ class NLP:
|
||||
)
|
||||
cls._emotion_classifiers[device_str] = classifier
|
||||
return classifier
|
||||
|
||||
@classmethod
|
||||
def _get_entity_recognizer(cls, device_str: str, pipeline_device: int) -> Any:
|
||||
recognizer = cls._entity_recognizers.get(device_str)
|
||||
if recognizer is None:
|
||||
pipeline_kwargs = {
|
||||
"aggregation_strategy": "simple", # merges subwords
|
||||
"device": pipeline_device,
|
||||
}
|
||||
|
||||
if device_str == "cuda":
|
||||
pipeline_kwargs["dtype"] = torch.float16
|
||||
|
||||
recognizer = pipeline(
|
||||
"token-classification",
|
||||
model="dslim/bert-base-NER",
|
||||
**pipeline_kwargs,
|
||||
)
|
||||
cls._entity_recognizers[device_str] = recognizer
|
||||
|
||||
return recognizer
|
||||
|
||||
def _encode_with_backoff(
|
||||
self, texts: list[str], initial_batch_size: int
|
||||
@@ -129,6 +154,26 @@ class NLP:
|
||||
continue
|
||||
raise
|
||||
|
||||
def _infer_entities_with_backoff(
|
||||
self, texts: list[str], initial_batch_size: int
|
||||
) -> list[list[dict[str, Any]]]:
|
||||
|
||||
batch_size = initial_batch_size
|
||||
|
||||
while True:
|
||||
try:
|
||||
return self.entity_recognizer(texts, batch_size=batch_size)
|
||||
except RuntimeError as exc:
|
||||
if (
|
||||
self.use_cuda
|
||||
and "out of memory" in str(exc).lower()
|
||||
and batch_size > 4
|
||||
):
|
||||
batch_size = max(4, batch_size // 2)
|
||||
torch.cuda.empty_cache()
|
||||
continue
|
||||
raise
|
||||
|
||||
def add_emotion_cols(self) -> None:
|
||||
texts = self.df[self.content_col].astype(str).str.slice(0, 512).tolist()
|
||||
|
||||
@@ -183,3 +228,51 @@ class NLP:
|
||||
self.df.loc[self.df["topic_confidence"] < confidence_threshold, "topic"] = (
|
||||
"Misc"
|
||||
)
|
||||
|
||||
def add_ner_cols(self, max_chars: int = 512) -> None:
|
||||
texts = (
|
||||
self.df[self.content_col]
|
||||
.fillna("")
|
||||
.astype(str)
|
||||
.str.slice(0, max_chars)
|
||||
.tolist()
|
||||
)
|
||||
|
||||
if not texts:
|
||||
self.df["entities"] = []
|
||||
self.df["entity_counts"] = []
|
||||
return
|
||||
|
||||
results = self._infer_entities_with_backoff(texts, 32 if self.use_cuda else 8)
|
||||
|
||||
entity_lists = []
|
||||
entity_count_dicts = []
|
||||
|
||||
for row in results:
|
||||
entities = []
|
||||
counts = {}
|
||||
|
||||
for ent in row:
|
||||
word = ent.get("word")
|
||||
label = ent.get("entity_group")
|
||||
|
||||
if isinstance(word, str) and isinstance(label, str):
|
||||
entities.append({"text": word, "label": label})
|
||||
counts[label] = counts.get(label, 0) + 1
|
||||
|
||||
entity_lists.append(entities)
|
||||
entity_count_dicts.append(counts)
|
||||
|
||||
self.df["entities"] = entity_lists
|
||||
self.df["entity_counts"] = entity_count_dicts
|
||||
|
||||
# Expand label counts into columns
|
||||
all_labels = set()
|
||||
for d in entity_count_dicts:
|
||||
all_labels.update(d.keys())
|
||||
|
||||
for label in all_labels:
|
||||
col_name = f"entity_{label}"
|
||||
self.df[col_name] = [
|
||||
d.get(label, 0) for d in entity_count_dicts
|
||||
]
|
||||
70
server/analysis/temporal.py
Normal file
70
server/analysis/temporal.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import pandas as pd
|
||||
|
||||
class TemporalAnalysis:
|
||||
def __init__(self, df: pd.DataFrame):
|
||||
self.df = df
|
||||
|
||||
def avg_reply_time_per_emotion(self) -> dict:
|
||||
df = self.df.copy()
|
||||
|
||||
replies = df[
|
||||
(df["type"] == "comment") &
|
||||
(df["reply_to"].notna()) &
|
||||
(df["reply_to"] != "")
|
||||
]
|
||||
|
||||
id_to_time = df.set_index("id")["dt"].to_dict()
|
||||
|
||||
def compute_reply_time(row):
|
||||
reply_id = row["reply_to"]
|
||||
parent_time = id_to_time.get(reply_id)
|
||||
|
||||
if parent_time is None:
|
||||
return None
|
||||
|
||||
return (row["dt"] - parent_time).total_seconds()
|
||||
|
||||
replies["reply_time"] = replies.apply(compute_reply_time, axis=1)
|
||||
emotion_cols = [col for col in df.columns if col.startswith("emotion_") and col not in ("emotion_neutral", "emotion_surprise")]
|
||||
replies["dominant_emotion"] = replies[emotion_cols].idxmax(axis=1)
|
||||
|
||||
grouped = (
|
||||
replies
|
||||
.groupby("dominant_emotion")["reply_time"]
|
||||
.agg(["mean", "count"])
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
return grouped.to_dict(orient="records")
|
||||
|
||||
def posts_per_day(self) -> dict:
|
||||
per_day = (
|
||||
self.df.groupby("date")
|
||||
.size()
|
||||
.reset_index(name="count")
|
||||
)
|
||||
|
||||
return per_day.to_dict(orient="records")
|
||||
|
||||
def heatmap(self) -> dict:
|
||||
weekday_order = [
|
||||
"Monday", "Tuesday", "Wednesday",
|
||||
"Thursday", "Friday", "Saturday", "Sunday"
|
||||
]
|
||||
|
||||
self.df["weekday"] = pd.Categorical(
|
||||
self.df["weekday"],
|
||||
categories=weekday_order,
|
||||
ordered=True
|
||||
)
|
||||
|
||||
heatmap = (
|
||||
self.df
|
||||
.groupby(["weekday", "hour"], observed=True)
|
||||
.size()
|
||||
.unstack(fill_value=0)
|
||||
.reindex(columns=range(24), fill_value=0)
|
||||
)
|
||||
|
||||
heatmap.columns = heatmap.columns.map(str)
|
||||
return heatmap.to_dict(orient="records")
|
||||
@@ -12,7 +12,7 @@ app = Flask(__name__)
|
||||
CORS(app, resources={r"/*": {"origins": "http://localhost:5173"}})
|
||||
|
||||
# Global State
|
||||
posts_df = pd.read_json('posts.jsonl', lines=True)
|
||||
posts_df = pd.read_json('small.jsonl', lines=True)
|
||||
with open("topic_buckets.json", "r", encoding="utf-8") as f:
|
||||
domain_topics = json.load(f)
|
||||
stat_obj = StatGen(posts_df, domain_topics)
|
||||
@@ -47,7 +47,7 @@ def get_dataset():
|
||||
if stat_obj is None:
|
||||
return jsonify({"error": "No data uploaded"}), 400
|
||||
|
||||
return jsonify(stat_obj.df.to_dict(orient="records")), 200
|
||||
return stat_obj.df.to_json(orient="records"), 200, {"Content-Type": "application/json"}
|
||||
|
||||
@app.route('/stats/content', methods=['GET'])
|
||||
def word_frequencies():
|
||||
@@ -55,7 +55,7 @@ def word_frequencies():
|
||||
return jsonify({"error": "No data uploaded"}), 400
|
||||
|
||||
try:
|
||||
return jsonify(stat_obj.content_analysis()), 200
|
||||
return jsonify(stat_obj.get_content_analysis()), 200
|
||||
except ValueError as e:
|
||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||
except Exception as e:
|
||||
@@ -80,7 +80,7 @@ def get_time_analysis():
|
||||
return jsonify({"error": "No data uploaded"}), 400
|
||||
|
||||
try:
|
||||
return jsonify(stat_obj.time_analysis()), 200
|
||||
return jsonify(stat_obj.get_time_analysis()), 200
|
||||
except ValueError as e:
|
||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||
except Exception as e:
|
||||
@@ -93,13 +93,39 @@ def get_user_analysis():
|
||||
return jsonify({"error": "No data uploaded"}), 400
|
||||
|
||||
try:
|
||||
return jsonify(stat_obj.user_analysis()), 200
|
||||
return jsonify(stat_obj.get_user_analysis()), 200
|
||||
except ValueError as e:
|
||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||
|
||||
@app.route("/stats/cultural", methods=["GET"])
|
||||
def get_cultural_analysis():
|
||||
if stat_obj is None:
|
||||
return jsonify({"error": "No data uploaded"}), 400
|
||||
|
||||
try:
|
||||
return jsonify(stat_obj.get_cultural_analysis()), 200
|
||||
except ValueError as e:
|
||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||
|
||||
@app.route("/stats/interaction", methods=["GET"])
|
||||
def get_interaction_analysis():
|
||||
if stat_obj is None:
|
||||
return jsonify({"error": "No data uploaded"}), 400
|
||||
|
||||
try:
|
||||
return jsonify(stat_obj.get_interactional_analysis()), 200
|
||||
except ValueError as e:
|
||||
return jsonify({"error": f"Malformed or missing data: {str(e)}"}), 400
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
||||
|
||||
@app.route('/filter/search', methods=["POST"])
|
||||
def search_dataset():
|
||||
if stat_obj is None:
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
import nltk
|
||||
import datetime
|
||||
import nltk
|
||||
|
||||
from nltk.corpus import stopwords
|
||||
from collections import Counter
|
||||
from server.nlp import NLP
|
||||
from server.analysis.nlp import NLP
|
||||
from server.analysis.temporal import TemporalAnalysis
|
||||
from server.analysis.emotional import EmotionalAnalysis
|
||||
from server.analysis.interactional import InteractionAnalysis
|
||||
from server.analysis.linguistic import LinguisticAnalysis
|
||||
|
||||
DOMAIN_STOPWORDS = {
|
||||
"www", "https", "http",
|
||||
@@ -23,6 +25,7 @@ EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
|
||||
class StatGen:
|
||||
def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None:
|
||||
comments_df = df[["id", "comments"]].explode("comments")
|
||||
comments_df = comments_df[comments_df["comments"].apply(lambda x: isinstance(x, dict))]
|
||||
comments_df = pd.json_normalize(comments_df["comments"])
|
||||
|
||||
posts_df = df.drop(columns=["comments"])
|
||||
@@ -35,9 +38,15 @@ class StatGen:
|
||||
|
||||
self.df = pd.concat([posts_df, comments_df])
|
||||
self.df.drop(columns=["post_id"], inplace=True, errors="ignore")
|
||||
|
||||
self.nlp = NLP(self.df, "title", "content", domain_topics)
|
||||
self._add_extra_cols(self.df)
|
||||
|
||||
self.temporal_analysis = TemporalAnalysis(self.df)
|
||||
self.emotional_analysis = EmotionalAnalysis(self.df)
|
||||
self.interaction_analysis = InteractionAnalysis(self.df, EXCLUDE_WORDS)
|
||||
self.linguistic_analysis = LinguisticAnalysis(self.df, EXCLUDE_WORDS)
|
||||
|
||||
self.original_df = self.df.copy(deep=True)
|
||||
|
||||
## Private Methods
|
||||
@@ -50,141 +59,52 @@ class StatGen:
|
||||
|
||||
self.nlp.add_emotion_cols()
|
||||
self.nlp.add_topic_col()
|
||||
|
||||
def _tokenize(self, text: str):
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
return [t for t in tokens if t not in EXCLUDE_WORDS]
|
||||
|
||||
def _vocab_richness_per_user(self, min_words: int = 20, top_most_used_words: int = 100) -> list:
|
||||
df = self.df.copy()
|
||||
df["content"] = df["content"].fillna("").astype(str).str.lower()
|
||||
df["tokens"] = df["content"].apply(self._tokenize)
|
||||
|
||||
rows = []
|
||||
for author, group in df.groupby("author"):
|
||||
all_tokens = [t for tokens in group["tokens"] for t in tokens]
|
||||
|
||||
total_words = len(all_tokens)
|
||||
unique_words = len(set(all_tokens))
|
||||
events = len(group)
|
||||
|
||||
# Min amount of words for a user, any less than this might give weird results
|
||||
if total_words < min_words:
|
||||
continue
|
||||
|
||||
# 100% = they never reused a word (excluding stop words)
|
||||
vocab_richness = unique_words / total_words
|
||||
avg_words = total_words / max(events, 1)
|
||||
|
||||
counts = Counter(all_tokens)
|
||||
top_words = [
|
||||
{"word": w, "count": int(c)}
|
||||
for w, c in counts.most_common(top_most_used_words)
|
||||
]
|
||||
|
||||
rows.append({
|
||||
"author": author,
|
||||
"events": int(events),
|
||||
"total_words": int(total_words),
|
||||
"unique_words": int(unique_words),
|
||||
"vocab_richness": round(vocab_richness, 3),
|
||||
"avg_words_per_event": round(avg_words, 2),
|
||||
"top_words": top_words
|
||||
})
|
||||
|
||||
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
|
||||
|
||||
return rows
|
||||
self.nlp.add_ner_cols()
|
||||
|
||||
def _interaction_graph(self):
|
||||
interactions = {a: {} for a in self.df["author"].dropna().unique()}
|
||||
|
||||
# reply_to refers to the comment id, this allows us to map comment ids to usernames
|
||||
id_to_author = self.df.set_index("id")["author"].to_dict()
|
||||
|
||||
for _, row in self.df.iterrows():
|
||||
a = row["author"]
|
||||
reply_id = row["reply_to"]
|
||||
|
||||
if pd.isna(a) or pd.isna(reply_id) or reply_id == "":
|
||||
continue
|
||||
|
||||
b = id_to_author.get(reply_id)
|
||||
if b is None or a == b:
|
||||
continue
|
||||
|
||||
interactions[a][b] = interactions[a].get(b, 0) + 1
|
||||
|
||||
return interactions
|
||||
|
||||
def _avg_reply_time_per_emotion(self):
|
||||
df = self.df.copy()
|
||||
|
||||
replies = df[
|
||||
(df["type"] == "comment") &
|
||||
(df["reply_to"].notna()) &
|
||||
(df["reply_to"] != "")
|
||||
]
|
||||
|
||||
id_to_time = df.set_index("id")["dt"].to_dict()
|
||||
|
||||
def compute_reply_time(row):
|
||||
reply_id = row["reply_to"]
|
||||
parent_time = id_to_time.get(reply_id)
|
||||
|
||||
if parent_time is None:
|
||||
return None
|
||||
|
||||
return (row["dt"] - parent_time).total_seconds()
|
||||
|
||||
replies["reply_time"] = replies.apply(compute_reply_time, axis=1)
|
||||
emotion_cols = [col for col in df.columns if col.startswith("emotion_") and col not in ("emotion_neutral", "emotion_surprise")]
|
||||
replies["dominant_emotion"] = replies[emotion_cols].idxmax(axis=1)
|
||||
|
||||
grouped = (
|
||||
replies
|
||||
.groupby("dominant_emotion")["reply_time"]
|
||||
.agg(["mean", "count"])
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
return grouped.to_dict(orient="records")
|
||||
|
||||
## Public
|
||||
def time_analysis(self) -> pd.DataFrame:
|
||||
per_day = (
|
||||
self.df.groupby("date")
|
||||
.size()
|
||||
.reset_index(name="count")
|
||||
)
|
||||
|
||||
weekday_order = [
|
||||
"Monday", "Tuesday", "Wednesday",
|
||||
"Thursday", "Friday", "Saturday", "Sunday"
|
||||
]
|
||||
|
||||
self.df["weekday"] = pd.Categorical(
|
||||
self.df["weekday"],
|
||||
categories=weekday_order,
|
||||
ordered=True
|
||||
)
|
||||
|
||||
heatmap = (
|
||||
self.df
|
||||
.groupby(["weekday", "hour"], observed=True)
|
||||
.size()
|
||||
.unstack(fill_value=0)
|
||||
.reindex(columns=range(24), fill_value=0)
|
||||
)
|
||||
|
||||
heatmap.columns = heatmap.columns.map(str)
|
||||
|
||||
burst_index = per_day["count"].std() / max(per_day["count"].mean(), 1)
|
||||
|
||||
# topics over time
|
||||
# emotions over time
|
||||
def get_time_analysis(self) -> pd.DataFrame:
|
||||
return {
|
||||
"events_per_day": per_day.to_dict(orient="records"),
|
||||
"weekday_hour_heatmap": heatmap.to_dict(orient="records"),
|
||||
"burstiness": round(burst_index, 2)
|
||||
"events_per_day": self.temporal_analysis.posts_per_day(),
|
||||
"weekday_hour_heatmap": self.temporal_analysis.heatmap()
|
||||
}
|
||||
|
||||
# average topic duration
|
||||
def get_content_analysis(self) -> dict:
|
||||
return {
|
||||
"word_frequencies": self.linguistic_analysis.word_frequencies(),
|
||||
"common_two_phrases": self.linguistic_analysis.ngrams(),
|
||||
"common_three_phrases": self.linguistic_analysis.ngrams(n=3),
|
||||
"average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(),
|
||||
"reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
|
||||
}
|
||||
|
||||
# average emotion per user
|
||||
# average chain length
|
||||
def get_user_analysis(self) -> dict:
|
||||
return {
|
||||
"top_users": self.interaction_analysis.top_users(),
|
||||
"users": self.interaction_analysis.per_user_analysis(),
|
||||
"interaction_graph": self.interaction_analysis.interaction_graph()
|
||||
}
|
||||
|
||||
# average / max thread depth
|
||||
# high engagment threads based on volume
|
||||
|
||||
def get_interactional_analysis(self) -> dict:
|
||||
return {
|
||||
"average_thread_depth": self.interaction_analysis.average_thread_depth(),
|
||||
"average_thread_length_by_emotion": self.interaction_analysis.average_thread_length_by_emotion()
|
||||
}
|
||||
|
||||
# detect community jargon
|
||||
# in-group and out-group linguistic markers
|
||||
def get_cultural_analysis(self) -> dict:
|
||||
return {
|
||||
"identity_markers": self.linguistic_analysis.identity_markers()
|
||||
}
|
||||
|
||||
def summary(self) -> dict:
|
||||
@@ -206,122 +126,6 @@ class StatGen:
|
||||
},
|
||||
"sources": self.df["source"].dropna().unique().tolist()
|
||||
}
|
||||
|
||||
def content_analysis(self, limit: int = 100) -> dict:
|
||||
texts = (
|
||||
self.df["content"]
|
||||
.dropna()
|
||||
.astype(str)
|
||||
.str.lower()
|
||||
)
|
||||
|
||||
words = []
|
||||
for text in texts:
|
||||
tokens = re.findall(r"\b[a-z]{3,}\b", text)
|
||||
words.extend(
|
||||
w for w in tokens
|
||||
if w not in EXCLUDE_WORDS
|
||||
)
|
||||
|
||||
counts = Counter(words)
|
||||
|
||||
word_frequencies = (
|
||||
pd.DataFrame(counts.items(), columns=["word", "count"])
|
||||
.sort_values("count", ascending=False)
|
||||
.head(limit)
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
emotion_exclusions = [
|
||||
"emotion_neutral",
|
||||
"emotion_surprise"
|
||||
]
|
||||
|
||||
emotion_cols = [
|
||||
col for col in self.df.columns
|
||||
if col.startswith("emotion_") and col not in emotion_exclusions
|
||||
]
|
||||
|
||||
counts = (
|
||||
self.df[
|
||||
(self.df["topic"] != "Misc")
|
||||
]
|
||||
.groupby("topic")
|
||||
.size()
|
||||
.rename("n")
|
||||
)
|
||||
|
||||
avg_emotion_by_topic = (
|
||||
self.df[
|
||||
(self.df["topic"] != "Misc")
|
||||
]
|
||||
.groupby("topic")[emotion_cols]
|
||||
.mean()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
avg_emotion_by_topic = avg_emotion_by_topic.merge(
|
||||
counts,
|
||||
on="topic"
|
||||
)
|
||||
|
||||
return {
|
||||
"word_frequencies": word_frequencies.to_dict(orient='records'),
|
||||
"average_emotion_by_topic": avg_emotion_by_topic.to_dict(orient='records'),
|
||||
"reply_time_by_emotion": self._avg_reply_time_per_emotion()
|
||||
}
|
||||
|
||||
def user_analysis(self) -> dict:
|
||||
counts = (
|
||||
self.df.groupby(["author", "source"])
|
||||
.size()
|
||||
.sort_values(ascending=False)
|
||||
)
|
||||
|
||||
top_users = [
|
||||
{"author": author, "source": source, "count": int(count)}
|
||||
for (author, source), count in counts.items()
|
||||
]
|
||||
|
||||
per_user = (
|
||||
self.df.groupby(["author", "type"])
|
||||
.size()
|
||||
.unstack(fill_value=0)
|
||||
)
|
||||
|
||||
# ensure columns always exist
|
||||
for col in ("post", "comment"):
|
||||
if col not in per_user.columns:
|
||||
per_user[col] = 0
|
||||
|
||||
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
|
||||
per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
|
||||
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
|
||||
per_user_records = per_user.reset_index().to_dict(orient="records")
|
||||
|
||||
vocab_rows = self._vocab_richness_per_user()
|
||||
vocab_by_author = {row["author"]: row for row in vocab_rows}
|
||||
|
||||
# merge vocab richness + per_user information
|
||||
merged_users = []
|
||||
for row in per_user_records:
|
||||
author = row["author"]
|
||||
merged_users.append({
|
||||
"author": author,
|
||||
"post": int(row.get("post", 0)),
|
||||
"comment": int(row.get("comment", 0)),
|
||||
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
|
||||
"comment_share": float(row.get("comment_share", 0)),
|
||||
"vocab": vocab_by_author.get(author)
|
||||
})
|
||||
|
||||
merged_users.sort(key=lambda u: u["comment_post_ratio"])
|
||||
|
||||
return {
|
||||
"top_users": top_users,
|
||||
"users": merged_users,
|
||||
"interaction_graph": self._interaction_graph()
|
||||
}
|
||||
|
||||
def search(self, search_query: str) -> dict:
|
||||
self.df = self.df[
|
||||
|
||||
Reference in New Issue
Block a user