Compare commits

...

10 Commits

12 changed files with 827 additions and 255 deletions

View File

@@ -15,6 +15,7 @@
"headlessui": "^0.0.0", "headlessui": "^0.0.0",
"react": "^19.2.0", "react": "^19.2.0",
"react-dom": "^19.2.0", "react-dom": "^19.2.0",
"react-force-graph-3d": "^1.29.1",
"react-router-dom": "^7.13.0", "react-router-dom": "^7.13.0",
"recharts": "^3.7.0" "recharts": "^3.7.0"
}, },
@@ -267,6 +268,15 @@
"@babel/core": "^7.0.0-0" "@babel/core": "^7.0.0-0"
} }
}, },
"node_modules/@babel/runtime": {
"version": "7.28.6",
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz",
"integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==",
"license": "MIT",
"engines": {
"node": ">=6.9.0"
}
},
"node_modules/@babel/template": { "node_modules/@babel/template": {
"version": "7.28.6", "version": "7.28.6",
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz", "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz",
@@ -2022,6 +2032,12 @@
"url": "https://github.com/sponsors/tannerlinsley" "url": "https://github.com/sponsors/tannerlinsley"
} }
}, },
"node_modules/@tweenjs/tween.js": {
"version": "25.0.0",
"resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-25.0.0.tgz",
"integrity": "sha512-XKLA6syeBUaPzx4j3qwMqzzq+V4uo72BnlbOjmuljLrRqdsd3qnzvZZoxvMHZ23ndsRS4aufU6JOZYpCbU6T1A==",
"license": "MIT"
},
"node_modules/@types/babel__core": { "node_modules/@types/babel__core": {
"version": "7.20.5", "version": "7.20.5",
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
@@ -2488,6 +2504,31 @@
"vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
} }
}, },
"node_modules/3d-force-graph": {
"version": "1.79.1",
"resolved": "https://registry.npmjs.org/3d-force-graph/-/3d-force-graph-1.79.1.tgz",
"integrity": "sha512-iscIVt4jWjJ11KEEswgOIOWk8Ew4EFKHRyERJXJ0ouycqzHCtWwb9E5imnxS5rYF1f1IESkFNAfB+h3EkU0Irw==",
"license": "MIT",
"dependencies": {
"accessor-fn": "1",
"kapsule": "^1.16",
"three": ">=0.118 <1",
"three-forcegraph": "1",
"three-render-objects": "^1.35"
},
"engines": {
"node": ">=12"
}
},
"node_modules/accessor-fn": {
"version": "1.5.3",
"resolved": "https://registry.npmjs.org/accessor-fn/-/accessor-fn-1.5.3.tgz",
"integrity": "sha512-rkAofCwe/FvYFUlMB0v0gWmhqtfAtV1IUkdPbfhTUyYniu5LrC0A0UJkTH0Jv3S8SvwkmfuAlY+mQIJATdocMA==",
"license": "MIT",
"engines": {
"node": ">=12"
}
},
"node_modules/acorn": { "node_modules/acorn": {
"version": "8.15.0", "version": "8.15.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
@@ -2793,6 +2834,12 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/d3-binarytree": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/d3-binarytree/-/d3-binarytree-1.0.2.tgz",
"integrity": "sha512-cElUNH+sHu95L04m92pG73t2MEJXKu+GeKUN1TJkFsu93E5W8E9Sc3kHEGJKgenGvj19m6upSn2EunvMgMD2Yw==",
"license": "MIT"
},
"node_modules/d3-cloud": { "node_modules/d3-cloud": {
"version": "1.2.8", "version": "1.2.8",
"resolved": "https://registry.npmjs.org/d3-cloud/-/d3-cloud-1.2.8.tgz", "resolved": "https://registry.npmjs.org/d3-cloud/-/d3-cloud-1.2.8.tgz",
@@ -2826,6 +2873,22 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/d3-force-3d": {
"version": "3.0.6",
"resolved": "https://registry.npmjs.org/d3-force-3d/-/d3-force-3d-3.0.6.tgz",
"integrity": "sha512-4tsKHUPLOVkyfEffZo1v6sFHvGFwAIIjt/W8IThbp08DYAsXZck+2pSHEG5W1+gQgEvFLdZkYvmJAbRM2EzMnA==",
"license": "MIT",
"dependencies": {
"d3-binarytree": "1",
"d3-dispatch": "1 - 3",
"d3-octree": "1",
"d3-quadtree": "1 - 3",
"d3-timer": "1 - 3"
},
"engines": {
"node": ">=12"
}
},
"node_modules/d3-format": { "node_modules/d3-format": {
"version": "3.1.2", "version": "3.1.2",
"resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz", "resolved": "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz",
@@ -2847,6 +2910,12 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/d3-octree": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/d3-octree/-/d3-octree-1.1.0.tgz",
"integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==",
"license": "MIT"
},
"node_modules/d3-path": { "node_modules/d3-path": {
"version": "3.1.0", "version": "3.1.0",
"resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz", "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
@@ -2856,6 +2925,15 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/d3-quadtree": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/d3-quadtree/-/d3-quadtree-3.0.1.tgz",
"integrity": "sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==",
"license": "ISC",
"engines": {
"node": ">=12"
}
},
"node_modules/d3-scale": { "node_modules/d3-scale": {
"version": "4.0.2", "version": "4.0.2",
"resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz", "resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
@@ -2958,6 +3036,18 @@
"d3-selection": "2 - 3" "d3-selection": "2 - 3"
} }
}, },
"node_modules/data-bind-mapper": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/data-bind-mapper/-/data-bind-mapper-1.0.3.tgz",
"integrity": "sha512-QmU3lyEnbENQPo0M1F9BMu4s6cqNNp8iJA+b/HP2sSb7pf3dxwF3+EP1eO69rwBfH9kFJ1apmzrtogAmVt2/Xw==",
"license": "MIT",
"dependencies": {
"accessor-fn": "1"
},
"engines": {
"node": ">=12"
}
},
"node_modules/debug": { "node_modules/debug": {
"version": "4.4.3", "version": "4.4.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -3419,6 +3509,20 @@
"dev": true, "dev": true,
"license": "ISC" "license": "ISC"
}, },
"node_modules/float-tooltip": {
"version": "1.7.5",
"resolved": "https://registry.npmjs.org/float-tooltip/-/float-tooltip-1.7.5.tgz",
"integrity": "sha512-/kXzuDnnBqyyWyhDMH7+PfP8J/oXiAavGzcRxASOMRHFuReDtofizLLJsf7nnDLAfEaMW4pVWaXrAjtnglpEkg==",
"license": "MIT",
"dependencies": {
"d3-selection": "2 - 3",
"kapsule": "^1.16",
"preact": "10"
},
"engines": {
"node": ">=12"
}
},
"node_modules/follow-redirects": { "node_modules/follow-redirects": {
"version": "1.15.11", "version": "1.15.11",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
@@ -3722,11 +3826,19 @@
"dev": true, "dev": true,
"license": "ISC" "license": "ISC"
}, },
"node_modules/jerrypick": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/jerrypick/-/jerrypick-1.1.2.tgz",
"integrity": "sha512-YKnxXEekXKzhpf7CLYA0A+oDP8V0OhICNCr5lv96FvSsDEmrb0GKM776JgQvHTMjr7DTTPEVv/1Ciaw0uEWzBA==",
"license": "MIT",
"engines": {
"node": ">=12"
}
},
"node_modules/js-tokens": { "node_modules/js-tokens": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
"integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==",
"dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/js-yaml": { "node_modules/js-yaml": {
@@ -3789,6 +3901,18 @@
"node": ">=6" "node": ">=6"
} }
}, },
"node_modules/kapsule": {
"version": "1.16.3",
"resolved": "https://registry.npmjs.org/kapsule/-/kapsule-1.16.3.tgz",
"integrity": "sha512-4+5mNNf4vZDSwPhKprKwz3330iisPrb08JyMgbsdFrimBCKNHecua/WBwvVg3n7vwx0C1ARjfhwIpbrbd9n5wg==",
"license": "MIT",
"dependencies": {
"lodash-es": "4"
},
"engines": {
"node": ">=12"
}
},
"node_modules/keyv": { "node_modules/keyv": {
"version": "4.5.4", "version": "4.5.4",
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
@@ -3835,6 +3959,12 @@
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/lodash-es": {
"version": "4.17.23",
"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz",
"integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==",
"license": "MIT"
},
"node_modules/lodash.debounce": { "node_modules/lodash.debounce": {
"version": "4.0.8", "version": "4.0.8",
"resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz", "resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz",
@@ -3848,6 +3978,18 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/loose-envify": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
"integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==",
"license": "MIT",
"dependencies": {
"js-tokens": "^3.0.0 || ^4.0.0"
},
"bin": {
"loose-envify": "cli.js"
}
},
"node_modules/lru-cache": { "node_modules/lru-cache": {
"version": "5.1.1", "version": "5.1.1",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
@@ -3934,6 +4076,44 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/ngraph.events": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/ngraph.events/-/ngraph.events-1.4.0.tgz",
"integrity": "sha512-NeDGI4DSyjBNBRtA86222JoYietsmCXbs8CEB0dZ51Xeh4lhVl1y3wpWLumczvnha8sFQIW4E0vvVWwgmX2mGw==",
"license": "BSD-3-Clause"
},
"node_modules/ngraph.forcelayout": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/ngraph.forcelayout/-/ngraph.forcelayout-3.3.1.tgz",
"integrity": "sha512-MKBuEh1wujyQHFTW57y5vd/uuEOK0XfXYxm3lC7kktjJLRdt/KEKEknyOlc6tjXflqBKEuYBBcu7Ax5VY+S6aw==",
"license": "BSD-3-Clause",
"dependencies": {
"ngraph.events": "^1.0.0",
"ngraph.merge": "^1.0.0",
"ngraph.random": "^1.0.0"
}
},
"node_modules/ngraph.graph": {
"version": "20.1.2",
"resolved": "https://registry.npmjs.org/ngraph.graph/-/ngraph.graph-20.1.2.tgz",
"integrity": "sha512-W/G3GBR3Y5UxMLHTUCPP9v+pbtpzwuAEIqP5oZV+9IwgxAIEZwh+Foc60iPc1idlnK7Zxu0p3puxAyNmDvBd0Q==",
"license": "BSD-3-Clause",
"dependencies": {
"ngraph.events": "^1.4.0"
}
},
"node_modules/ngraph.merge": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/ngraph.merge/-/ngraph.merge-1.0.0.tgz",
"integrity": "sha512-5J8YjGITUJeapsomtTALYsw7rFveYkM+lBj3QiYZ79EymQcuri65Nw3knQtFxQBU1r5iOaVRXrSwMENUPK62Vg==",
"license": "MIT"
},
"node_modules/ngraph.random": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/ngraph.random/-/ngraph.random-1.2.0.tgz",
"integrity": "sha512-4EUeAGbB2HWX9njd6bP6tciN6ByJfoaAvmVL9QTaZSeXrW46eNGA9GajiXiPBbvFqxUWFkEbyo6x5qsACUuVfA==",
"license": "BSD-3-Clause"
},
"node_modules/node-releases": { "node_modules/node-releases": {
"version": "2.0.27", "version": "2.0.27",
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.27.tgz",
@@ -3941,6 +4121,15 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/object-assign": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
"integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/optionator": { "node_modules/optionator": {
"version": "0.9.4", "version": "0.9.4",
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -4044,6 +4233,18 @@
"url": "https://github.com/sponsors/jonschlinkert" "url": "https://github.com/sponsors/jonschlinkert"
} }
}, },
"node_modules/polished": {
"version": "4.3.1",
"resolved": "https://registry.npmjs.org/polished/-/polished-4.3.1.tgz",
"integrity": "sha512-OBatVyC/N7SCW/FaDHrSd+vn0o5cS855TOmYi4OkdWUMSJCET/xip//ch8xGUvtr3i44X9LVyWwQlRMTN3pwSA==",
"license": "MIT",
"dependencies": {
"@babel/runtime": "^7.17.8"
},
"engines": {
"node": ">=10"
}
},
"node_modules/postcss": { "node_modules/postcss": {
"version": "8.5.6", "version": "8.5.6",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
@@ -4073,6 +4274,16 @@
"node": "^10 || ^12 || >=14" "node": "^10 || ^12 || >=14"
} }
}, },
"node_modules/preact": {
"version": "10.28.3",
"resolved": "https://registry.npmjs.org/preact/-/preact-10.28.3.tgz",
"integrity": "sha512-tCmoRkPQLpBeWzpmbhryairGnhW9tKV6c6gr/w+RhoRoKEJwsjzipwp//1oCpGPOchvSLaAPlpcJi9MwMmoPyA==",
"license": "MIT",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/preact"
}
},
"node_modules/prelude-ls": { "node_modules/prelude-ls": {
"version": "1.2.1", "version": "1.2.1",
"resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
@@ -4083,6 +4294,23 @@
"node": ">= 0.8.0" "node": ">= 0.8.0"
} }
}, },
"node_modules/prop-types": {
"version": "15.8.1",
"resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
"integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
"license": "MIT",
"dependencies": {
"loose-envify": "^1.4.0",
"object-assign": "^4.1.1",
"react-is": "^16.13.1"
}
},
"node_modules/prop-types/node_modules/react-is": {
"version": "16.13.1",
"resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
"integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
"license": "MIT"
},
"node_modules/proxy-from-env": { "node_modules/proxy-from-env": {
"version": "1.1.0", "version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
@@ -4120,6 +4348,23 @@
"react": "^19.2.4" "react": "^19.2.4"
} }
}, },
"node_modules/react-force-graph-3d": {
"version": "1.29.1",
"resolved": "https://registry.npmjs.org/react-force-graph-3d/-/react-force-graph-3d-1.29.1.tgz",
"integrity": "sha512-5Vp+PGpYnO+zLwgK2NvNqdXHvsWLrFzpDfJW1vUA1twjo9SPvXqfUYQrnRmAbD+K2tOxkZw1BkbH31l5b4TWHg==",
"license": "MIT",
"dependencies": {
"3d-force-graph": "^1.79",
"prop-types": "15",
"react-kapsule": "^2.5"
},
"engines": {
"node": ">=12"
},
"peerDependencies": {
"react": "*"
}
},
"node_modules/react-is": { "node_modules/react-is": {
"version": "19.2.4", "version": "19.2.4",
"resolved": "https://registry.npmjs.org/react-is/-/react-is-19.2.4.tgz", "resolved": "https://registry.npmjs.org/react-is/-/react-is-19.2.4.tgz",
@@ -4127,6 +4372,21 @@
"license": "MIT", "license": "MIT",
"peer": true "peer": true
}, },
"node_modules/react-kapsule": {
"version": "2.5.7",
"resolved": "https://registry.npmjs.org/react-kapsule/-/react-kapsule-2.5.7.tgz",
"integrity": "sha512-kifAF4ZPD77qZKc4CKLmozq6GY1sBzPEJTIJb0wWFK6HsePJatK3jXplZn2eeAt3x67CDozgi7/rO8fNQ/AL7A==",
"license": "MIT",
"dependencies": {
"jerrypick": "^1.1.1"
},
"engines": {
"node": ">=12"
},
"peerDependencies": {
"react": ">=16.13.1"
}
},
"node_modules/react-redux": { "node_modules/react-redux": {
"version": "9.2.0", "version": "9.2.0",
"resolved": "https://registry.npmjs.org/react-redux/-/react-redux-9.2.0.tgz", "resolved": "https://registry.npmjs.org/react-redux/-/react-redux-9.2.0.tgz",
@@ -4413,12 +4673,67 @@
"integrity": "sha512-05PUHKSNE8ou2dwIxTngl4EzcnsCDZGJ/iCLtDflR/SHB/ny14rXc+qU5P4mG9JkusiV7EivzY9Mhm55AzAvCg==", "integrity": "sha512-05PUHKSNE8ou2dwIxTngl4EzcnsCDZGJ/iCLtDflR/SHB/ny14rXc+qU5P4mG9JkusiV7EivzY9Mhm55AzAvCg==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/three": {
"version": "0.182.0",
"resolved": "https://registry.npmjs.org/three/-/three-0.182.0.tgz",
"integrity": "sha512-GbHabT+Irv+ihI1/f5kIIsZ+Ef9Sl5A1Y7imvS5RQjWgtTPfPnZ43JmlYI7NtCRDK9zir20lQpfg8/9Yd02OvQ==",
"license": "MIT"
},
"node_modules/three-forcegraph": {
"version": "1.43.1",
"resolved": "https://registry.npmjs.org/three-forcegraph/-/three-forcegraph-1.43.1.tgz",
"integrity": "sha512-lQnYPLvR31gb91mF5xHhU0jPHJgBPw9QB23R6poCk8Tgvz8sQtq7wTxwClcPdfKCBbHXsb7FSqK06Osiu1kQ5A==",
"license": "MIT",
"dependencies": {
"accessor-fn": "1",
"d3-array": "1 - 3",
"d3-force-3d": "2 - 3",
"d3-scale": "1 - 4",
"d3-scale-chromatic": "1 - 3",
"data-bind-mapper": "1",
"kapsule": "^1.16",
"ngraph.forcelayout": "3",
"ngraph.graph": "20",
"tinycolor2": "1"
},
"engines": {
"node": ">=12"
},
"peerDependencies": {
"three": ">=0.118.3"
}
},
"node_modules/three-render-objects": {
"version": "1.40.4",
"resolved": "https://registry.npmjs.org/three-render-objects/-/three-render-objects-1.40.4.tgz",
"integrity": "sha512-Ukpu1pei3L5r809izvjsZxwuRcYLiyn6Uvy3lZ9bpMTdvj3i6PeX6w++/hs2ZS3KnEzGjb6YvTvh4UQuwHTDJg==",
"license": "MIT",
"dependencies": {
"@tweenjs/tween.js": "18 - 25",
"accessor-fn": "1",
"float-tooltip": "^1.7",
"kapsule": "^1.16",
"polished": "4"
},
"engines": {
"node": ">=12"
},
"peerDependencies": {
"three": ">=0.168"
}
},
"node_modules/tiny-invariant": { "node_modules/tiny-invariant": {
"version": "1.3.3", "version": "1.3.3",
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
"integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==", "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/tinycolor2": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/tinycolor2/-/tinycolor2-1.6.0.tgz",
"integrity": "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw==",
"license": "MIT"
},
"node_modules/tinyglobby": { "node_modules/tinyglobby": {
"version": "0.2.15", "version": "0.2.15",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",

View File

@@ -17,6 +17,7 @@
"headlessui": "^0.0.0", "headlessui": "^0.0.0",
"react": "^19.2.0", "react": "^19.2.0",
"react-dom": "^19.2.0", "react-dom": "^19.2.0",
"react-force-graph-3d": "^1.29.1",
"react-router-dom": "^7.13.0", "react-router-dom": "^7.13.0",
"recharts": "^3.7.0" "recharts": "^3.7.0"
}, },

View File

@@ -0,0 +1,61 @@
import ForceGraph3D from "react-force-graph-3d";
import {
type UserAnalysisResponse,
type InteractionGraph
} from '../types/ApiTypes';
import StatsStyling from "../styles/stats_styling";
const styles = StatsStyling;
function ApiToGraphData(apiData: InteractionGraph) {
const nodes = Object.keys(apiData).map(username => ({ id: username }));
const links = [];
for (const [source, targets] of Object.entries(apiData)) {
for (const [target, count] of Object.entries(targets)) {
links.push({ source, target, value: count });
}
}
// drop low-value and deleted interactions to reduce clutter
const filteredLinks = links.filter(link =>
link.value >= 2 &&
link.source !== "[deleted]" &&
link.target !== "[deleted]"
);
// also filter out nodes that are no longer connected after link filtering
const connectedNodeIds = new Set(filteredLinks.flatMap(link => [link.source, link.target]));
const filteredNodes = nodes.filter(node => connectedNodeIds.has(node.id));
return { nodes: filteredNodes, links: filteredLinks};
}
const InteractionStats = (props: { data: UserAnalysisResponse }) => {
const graphData = ApiToGraphData(props.data.interaction_graph);
return (
<div style={styles.page}>
<h2 style={styles.sectionTitle}>User Interaction Graph</h2>
<p style={styles.sectionSubtitle}>
This graph visualizes interactions between users based on comments and replies.
Nodes represent users, and edges represent interactions (e.g., comments or replies) between them.
</p>
<div style={{ height: "600px", border: "1px solid #ccc", borderRadius: 8, marginTop: 16 }}>
<ForceGraph3D
graphData={graphData}
nodeAutoColorBy="id"
linkDirectionalParticles={2}
linkDirectionalParticleSpeed={0.005}
linkWidth={(link) => Math.sqrt(link.value)}
nodeLabel={(node) => `${node.id}`}
/>
</div>
</div>
);
}
export default InteractionStats;

View File

@@ -3,6 +3,7 @@ import axios from "axios";
import StatsStyling from "../styles/stats_styling"; import StatsStyling from "../styles/stats_styling";
import SummaryStats from "../components/SummaryStats"; import SummaryStats from "../components/SummaryStats";
import EmotionalStats from "../components/EmotionalStats"; import EmotionalStats from "../components/EmotionalStats";
import InteractionStats from "../components/InteractionStats";
import { import {
type SummaryResponse, type SummaryResponse,
@@ -16,7 +17,7 @@ const styles = StatsStyling;
const StatPage = () => { const StatPage = () => {
const [error, setError] = useState(''); const [error, setError] = useState('');
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [activeView, setActiveView] = useState<"summary" | "emotional">("summary"); const [activeView, setActiveView] = useState<"summary" | "emotional" | "interaction">("summary");
const [userData, setUserData] = useState<UserAnalysisResponse | null>(null); const [userData, setUserData] = useState<UserAnalysisResponse | null>(null);
const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null); const [timeData, setTimeData] = useState<TimeAnalysisResponse | null>(null);
@@ -133,6 +134,13 @@ return (
> >
Emotional Emotional
</button> </button>
<button
onClick={() => setActiveView("interaction")}
style={activeView === "interaction" ? styles.buttonPrimary : styles.buttonSecondary}
>
Interaction
</button>
</div> </div>
{activeView === "summary" && ( {activeView === "summary" && (
@@ -154,6 +162,10 @@ return (
</div> </div>
)} )}
{activeView === "interaction" && userData && (
<InteractionStats data={userData} />
)}
</div> </div>
); );
} }

View File

@@ -35,9 +35,12 @@ type User = {
vocab?: Vocab | null; vocab?: Vocab | null;
}; };
type InteractionGraph = Record<string, Record<string, number>>;
type UserAnalysisResponse = { type UserAnalysisResponse = {
top_users: TopUser[]; top_users: TopUser[];
users: User[]; users: User[];
interaction_graph: InteractionGraph;
}; };
// Time Analysis // Time Analysis
@@ -89,6 +92,7 @@ export type {
TopUser, TopUser,
Vocab, Vocab,
User, User,
InteractionGraph,
UserAnalysisResponse, UserAnalysisResponse,
FrequencyWord, FrequencyWord,
AverageEmotionByTopic, AverageEmotionByTopic,

View File

@@ -0,0 +1,41 @@
import pandas as pd
class EmotionalAnalysis:
def __init__(self, df: pd.DataFrame):
self.df = df
def avg_emotion_by_topic(self) -> dict:
emotion_exclusions = [
"emotion_neutral",
"emotion_surprise"
]
emotion_cols = [
col for col in self.df.columns
if col.startswith("emotion_") and col not in emotion_exclusions
]
counts = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")
.size()
.rename("n")
)
avg_emotion_by_topic = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")[emotion_cols]
.mean()
.reset_index()
)
avg_emotion_by_topic = avg_emotion_by_topic.merge(
counts,
on="topic"
)
return avg_emotion_by_topic.to_dict(orient='records')

View File

@@ -0,0 +1,126 @@
import pandas as pd
import re
from collections import Counter
class InteractionAnalysis:
def __init__(self, df: pd.DataFrame, word_exclusions: set[str]):
self.df = df
self.word_exclusions = word_exclusions
def _tokenize(self, text: str):
tokens = re.findall(r"\b[a-z]{3,}\b", text)
return [t for t in tokens if t not in self.word_exclusions]
def _vocab_richness_per_user(self, min_words: int = 20, top_most_used_words: int = 100) -> list:
df = self.df.copy()
df["content"] = df["content"].fillna("").astype(str).str.lower()
df["tokens"] = df["content"].apply(self._tokenize)
rows = []
for author, group in df.groupby("author"):
all_tokens = [t for tokens in group["tokens"] for t in tokens]
total_words = len(all_tokens)
unique_words = len(set(all_tokens))
events = len(group)
# Min amount of words for a user, any less than this might give weird results
if total_words < min_words:
continue
# 100% = they never reused a word (excluding stop words)
vocab_richness = unique_words / total_words
avg_words = total_words / max(events, 1)
counts = Counter(all_tokens)
top_words = [
{"word": w, "count": int(c)}
for w, c in counts.most_common(top_most_used_words)
]
rows.append({
"author": author,
"events": int(events),
"total_words": int(total_words),
"unique_words": int(unique_words),
"vocab_richness": round(vocab_richness, 3),
"avg_words_per_event": round(avg_words, 2),
"top_words": top_words
})
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
return rows
def top_users(self) -> list:
counts = (
self.df.groupby(["author", "source"])
.size()
.sort_values(ascending=False)
)
top_users = [
{"author": author, "source": source, "count": int(count)}
for (author, source), count in counts.items()
]
return top_users
def per_user_analysis(self) -> dict:
per_user = (
self.df.groupby(["author", "type"])
.size()
.unstack(fill_value=0)
)
# ensure columns always exist
for col in ("post", "comment"):
if col not in per_user.columns:
per_user[col] = 0
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
per_user_records = per_user.reset_index().to_dict(orient="records")
vocab_rows = self._vocab_richness_per_user()
vocab_by_author = {row["author"]: row for row in vocab_rows}
# merge vocab richness + per_user information
merged_users = []
for row in per_user_records:
author = row["author"]
merged_users.append({
"author": author,
"post": int(row.get("post", 0)),
"comment": int(row.get("comment", 0)),
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
"comment_share": float(row.get("comment_share", 0)),
"vocab": vocab_by_author.get(author)
})
merged_users.sort(key=lambda u: u["comment_post_ratio"])
return merged_users
def interaction_graph(self):
interactions = {a: {} for a in self.df["author"].dropna().unique()}
# reply_to refers to the comment id, this allows us to map comment ids to usernames
id_to_author = self.df.set_index("id")["author"].to_dict()
for _, row in self.df.iterrows():
a = row["author"]
reply_id = row["reply_to"]
if pd.isna(a) or pd.isna(reply_id) or reply_id == "":
continue
b = id_to_author.get(reply_id)
if b is None or a == b:
continue
interactions[a][b] = interactions[a].get(b, 0) + 1
return interactions

View File

@@ -0,0 +1,68 @@
import pandas as pd
import re
from collections import Counter
from itertools import islice
class LinguisticAnalysis:
def __init__(self, df: pd.DataFrame, word_exclusions: set[str]):
self.df = df
self.word_exclusions = word_exclusions
def _clean_text(self, text: str) -> str:
text = re.sub(r"http\S+", "", text) # remove URLs
text = re.sub(r"www\S+", "", text)
text = re.sub(r"&\w+;", "", text) # remove HTML entities
text = re.sub(r"\bamp\b", "", text) # remove stray amp
text = re.sub(r"\S+\.(jpg|jpeg|png|webp|gif)", "", text)
return text
def word_frequencies(self, limit: int = 100) -> dict:
texts = (
self.df["content"]
.dropna()
.astype(str)
.str.lower()
)
words = []
for text in texts:
tokens = re.findall(r"\b[a-z]{3,}\b", text)
words.extend(
w for w in tokens
if w not in self.word_exclusions
)
counts = Counter(words)
word_frequencies = (
pd.DataFrame(counts.items(), columns=["word", "count"])
.sort_values("count", ascending=False)
.head(limit)
.reset_index(drop=True)
)
return word_frequencies.to_dict(orient="records")
def ngrams(self, n=2, limit=100):
texts = self.df["content"].dropna().astype(str).apply(self._clean_text).str.lower()
all_ngrams = []
for text in texts:
tokens = re.findall(r"\b[a-z]{3,}\b", text)
# stop word removal causes strange behaviors in ngrams
#tokens = [w for w in tokens if w not in self.word_exclusions]
ngrams = zip(*(islice(tokens, i, None) for i in range(n)))
all_ngrams.extend([" ".join(ng) for ng in ngrams])
counts = Counter(all_ngrams)
return (
pd.DataFrame(counts.items(), columns=["ngram", "count"])
.sort_values("count", ascending=False)
.head(limit)
.to_dict(orient="records")
)

View File

@@ -9,6 +9,7 @@ from sentence_transformers import SentenceTransformer
class NLP: class NLP:
_topic_models: dict[str, SentenceTransformer] = {} _topic_models: dict[str, SentenceTransformer] = {}
_emotion_classifiers: dict[str, Any] = {} _emotion_classifiers: dict[str, Any] = {}
_entity_recognizers: dict[str, Any] = {}
_topic_embedding_cache: dict[tuple[str, ...], np.ndarray] = {} _topic_embedding_cache: dict[tuple[str, ...], np.ndarray] = {}
def __init__( def __init__(
@@ -29,6 +30,9 @@ class NLP:
self.emotion_classifier = self._get_emotion_classifier( self.emotion_classifier = self._get_emotion_classifier(
self.device_str, self.pipeline_device self.device_str, self.pipeline_device
) )
self.entity_recognizer = self._get_entity_recognizer(
self.device_str, self.pipeline_device
)
except RuntimeError as exc: except RuntimeError as exc:
if self.use_cuda and "out of memory" in str(exc).lower(): if self.use_cuda and "out of memory" in str(exc).lower():
torch.cuda.empty_cache() torch.cuda.empty_cache()
@@ -87,6 +91,27 @@ class NLP:
cls._emotion_classifiers[device_str] = classifier cls._emotion_classifiers[device_str] = classifier
return classifier return classifier
@classmethod
def _get_entity_recognizer(cls, device_str: str, pipeline_device: int) -> Any:
recognizer = cls._entity_recognizers.get(device_str)
if recognizer is None:
pipeline_kwargs = {
"aggregation_strategy": "simple", # merges subwords
"device": pipeline_device,
}
if device_str == "cuda":
pipeline_kwargs["dtype"] = torch.float16
recognizer = pipeline(
"token-classification",
model="dslim/bert-base-NER",
**pipeline_kwargs,
)
cls._entity_recognizers[device_str] = recognizer
return recognizer
def _encode_with_backoff( def _encode_with_backoff(
self, texts: list[str], initial_batch_size: int self, texts: list[str], initial_batch_size: int
) -> np.ndarray: ) -> np.ndarray:
@@ -129,6 +154,26 @@ class NLP:
continue continue
raise raise
def _infer_entities_with_backoff(
self, texts: list[str], initial_batch_size: int
) -> list[list[dict[str, Any]]]:
batch_size = initial_batch_size
while True:
try:
return self.entity_recognizer(texts, batch_size=batch_size)
except RuntimeError as exc:
if (
self.use_cuda
and "out of memory" in str(exc).lower()
and batch_size > 4
):
batch_size = max(4, batch_size // 2)
torch.cuda.empty_cache()
continue
raise
def add_emotion_cols(self) -> None: def add_emotion_cols(self) -> None:
texts = self.df[self.content_col].astype(str).str.slice(0, 512).tolist() texts = self.df[self.content_col].astype(str).str.slice(0, 512).tolist()
@@ -183,3 +228,51 @@ class NLP:
self.df.loc[self.df["topic_confidence"] < confidence_threshold, "topic"] = ( self.df.loc[self.df["topic_confidence"] < confidence_threshold, "topic"] = (
"Misc" "Misc"
) )
def add_ner_cols(self, max_chars: int = 512) -> None:
texts = (
self.df[self.content_col]
.fillna("")
.astype(str)
.str.slice(0, max_chars)
.tolist()
)
if not texts:
self.df["entities"] = []
self.df["entity_counts"] = []
return
results = self._infer_entities_with_backoff(texts, 32 if self.use_cuda else 8)
entity_lists = []
entity_count_dicts = []
for row in results:
entities = []
counts = {}
for ent in row:
word = ent.get("word")
label = ent.get("entity_group")
if isinstance(word, str) and isinstance(label, str):
entities.append({"text": word, "label": label})
counts[label] = counts.get(label, 0) + 1
entity_lists.append(entities)
entity_count_dicts.append(counts)
self.df["entities"] = entity_lists
self.df["entity_counts"] = entity_count_dicts
# Expand label counts into columns
all_labels = set()
for d in entity_count_dicts:
all_labels.update(d.keys())
for label in all_labels:
col_name = f"entity_{label}"
self.df[col_name] = [
d.get(label, 0) for d in entity_count_dicts
]

View File

@@ -0,0 +1,70 @@
import pandas as pd
class TemporalAnalysis:
def __init__(self, df: pd.DataFrame):
self.df = df
def avg_reply_time_per_emotion(self) -> dict:
df = self.df.copy()
replies = df[
(df["type"] == "comment") &
(df["reply_to"].notna()) &
(df["reply_to"] != "")
]
id_to_time = df.set_index("id")["dt"].to_dict()
def compute_reply_time(row):
reply_id = row["reply_to"]
parent_time = id_to_time.get(reply_id)
if parent_time is None:
return None
return (row["dt"] - parent_time).total_seconds()
replies["reply_time"] = replies.apply(compute_reply_time, axis=1)
emotion_cols = [col for col in df.columns if col.startswith("emotion_") and col not in ("emotion_neutral", "emotion_surprise")]
replies["dominant_emotion"] = replies[emotion_cols].idxmax(axis=1)
grouped = (
replies
.groupby("dominant_emotion")["reply_time"]
.agg(["mean", "count"])
.reset_index()
)
return grouped.to_dict(orient="records")
def posts_per_day(self) -> dict:
per_day = (
self.df.groupby("date")
.size()
.reset_index(name="count")
)
return per_day.to_dict(orient="records")
def heatmap(self) -> dict:
weekday_order = [
"Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"
]
self.df["weekday"] = pd.Categorical(
self.df["weekday"],
categories=weekday_order,
ordered=True
)
heatmap = (
self.df
.groupby(["weekday", "hour"], observed=True)
.size()
.unstack(fill_value=0)
.reindex(columns=range(24), fill_value=0)
)
heatmap.columns = heatmap.columns.map(str)
return heatmap.to_dict(orient="records")

View File

@@ -12,7 +12,7 @@ app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "http://localhost:5173"}}) CORS(app, resources={r"/*": {"origins": "http://localhost:5173"}})
# Global State # Global State
posts_df = pd.read_json('posts.jsonl', lines=True) posts_df = pd.read_json('small.jsonl', lines=True)
with open("topic_buckets.json", "r", encoding="utf-8") as f: with open("topic_buckets.json", "r", encoding="utf-8") as f:
domain_topics = json.load(f) domain_topics = json.load(f)
stat_obj = StatGen(posts_df, domain_topics) stat_obj = StatGen(posts_df, domain_topics)
@@ -47,7 +47,7 @@ def get_dataset():
if stat_obj is None: if stat_obj is None:
return jsonify({"error": "No data uploaded"}), 400 return jsonify({"error": "No data uploaded"}), 400
return jsonify(stat_obj.df.to_dict(orient="records")), 200 return stat_obj.df.to_json(orient="records"), 200, {"Content-Type": "application/json"}
@app.route('/stats/content', methods=['GET']) @app.route('/stats/content', methods=['GET'])
def word_frequencies(): def word_frequencies():

View File

@@ -1,11 +1,13 @@
import pandas as pd import pandas as pd
import re
import nltk
import datetime import datetime
import nltk
from nltk.corpus import stopwords from nltk.corpus import stopwords
from collections import Counter from server.analysis.nlp import NLP
from server.nlp import NLP from server.analysis.temporal import TemporalAnalysis
from server.analysis.emotional import EmotionalAnalysis
from server.analysis.interactional import InteractionAnalysis
from server.analysis.linguistic import LinguisticAnalysis
DOMAIN_STOPWORDS = { DOMAIN_STOPWORDS = {
"www", "https", "http", "www", "https", "http",
@@ -23,6 +25,7 @@ EXCLUDE_WORDS = set(stopwords.words('english')) | DOMAIN_STOPWORDS
class StatGen: class StatGen:
def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None: def __init__(self, df: pd.DataFrame, domain_topics: dict) -> None:
comments_df = df[["id", "comments"]].explode("comments") comments_df = df[["id", "comments"]].explode("comments")
comments_df = comments_df[comments_df["comments"].apply(lambda x: isinstance(x, dict))]
comments_df = pd.json_normalize(comments_df["comments"]) comments_df = pd.json_normalize(comments_df["comments"])
posts_df = df.drop(columns=["comments"]) posts_df = df.drop(columns=["comments"])
@@ -35,9 +38,15 @@ class StatGen:
self.df = pd.concat([posts_df, comments_df]) self.df = pd.concat([posts_df, comments_df])
self.df.drop(columns=["post_id"], inplace=True, errors="ignore") self.df.drop(columns=["post_id"], inplace=True, errors="ignore")
self.nlp = NLP(self.df, "title", "content", domain_topics) self.nlp = NLP(self.df, "title", "content", domain_topics)
self._add_extra_cols(self.df) self._add_extra_cols(self.df)
self.temporal_analysis = TemporalAnalysis(self.df)
self.emotional_analysis = EmotionalAnalysis(self.df)
self.interaction_analysis = InteractionAnalysis(self.df, EXCLUDE_WORDS)
self.linguistic_analysis = LinguisticAnalysis(self.df, EXCLUDE_WORDS)
self.original_df = self.df.copy(deep=True) self.original_df = self.df.copy(deep=True)
## Private Methods ## Private Methods
@@ -50,141 +59,29 @@ class StatGen:
self.nlp.add_emotion_cols() self.nlp.add_emotion_cols()
self.nlp.add_topic_col() self.nlp.add_topic_col()
self.nlp.add_ner_cols()
def _tokenize(self, text: str):
tokens = re.findall(r"\b[a-z]{3,}\b", text)
return [t for t in tokens if t not in EXCLUDE_WORDS]
def _vocab_richness_per_user(self, min_words: int = 20, top_most_used_words: int = 100) -> list:
df = self.df.copy()
df["content"] = df["content"].fillna("").astype(str).str.lower()
df["tokens"] = df["content"].apply(self._tokenize)
rows = []
for author, group in df.groupby("author"):
all_tokens = [t for tokens in group["tokens"] for t in tokens]
total_words = len(all_tokens)
unique_words = len(set(all_tokens))
events = len(group)
# Min amount of words for a user, any less than this might give weird results
if total_words < min_words:
continue
# 100% = they never reused a word (excluding stop words)
vocab_richness = unique_words / total_words
avg_words = total_words / max(events, 1)
counts = Counter(all_tokens)
top_words = [
{"word": w, "count": int(c)}
for w, c in counts.most_common(top_most_used_words)
]
rows.append({
"author": author,
"events": int(events),
"total_words": int(total_words),
"unique_words": int(unique_words),
"vocab_richness": round(vocab_richness, 3),
"avg_words_per_event": round(avg_words, 2),
"top_words": top_words
})
rows = sorted(rows, key=lambda x: x["vocab_richness"], reverse=True)
return rows
def _interaction_graph(self):
interactions = {a: {} for a in self.df["author"].dropna().unique()}
# reply_to refers to the comment id, this allows us to map comment ids to usernames
id_to_author = self.df.set_index("id")["author"].to_dict()
for _, row in self.df.iterrows():
a = row["author"]
reply_id = row["reply_to"]
if pd.isna(a) or pd.isna(reply_id) or reply_id == "":
continue
b = id_to_author.get(reply_id)
if b is None or a == b:
continue
interactions[a][b] = interactions[a].get(b, 0) + 1
return interactions
def _avg_reply_time_per_emotion(self):
df = self.df.copy()
replies = df[
(df["type"] == "comment") &
(df["reply_to"].notna()) &
(df["reply_to"] != "")
]
id_to_time = df.set_index("id")["dt"].to_dict()
def compute_reply_time(row):
reply_id = row["reply_to"]
parent_time = id_to_time.get(reply_id)
if parent_time is None:
return None
return (row["dt"] - parent_time).total_seconds()
replies["reply_time"] = replies.apply(compute_reply_time, axis=1)
emotion_cols = [col for col in df.columns if col.startswith("emotion_") and col not in ("emotion_neutral", "emotion_surprise")]
replies["dominant_emotion"] = replies[emotion_cols].idxmax(axis=1)
grouped = (
replies
.groupby("dominant_emotion")["reply_time"]
.agg(["mean", "count"])
.reset_index()
)
return grouped.to_dict(orient="records")
## Public ## Public
def time_analysis(self) -> pd.DataFrame: def time_analysis(self) -> pd.DataFrame:
per_day = (
self.df.groupby("date")
.size()
.reset_index(name="count")
)
weekday_order = [
"Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"
]
self.df["weekday"] = pd.Categorical(
self.df["weekday"],
categories=weekday_order,
ordered=True
)
heatmap = (
self.df
.groupby(["weekday", "hour"], observed=True)
.size()
.unstack(fill_value=0)
.reindex(columns=range(24), fill_value=0)
)
heatmap.columns = heatmap.columns.map(str)
burst_index = per_day["count"].std() / max(per_day["count"].mean(), 1)
return { return {
"events_per_day": per_day.to_dict(orient="records"), "events_per_day": self.temporal_analysis.posts_per_day(),
"weekday_hour_heatmap": heatmap.to_dict(orient="records"), "weekday_hour_heatmap": self.temporal_analysis.heatmap()
"burstiness": round(burst_index, 2) }
def content_analysis(self) -> dict:
return {
"word_frequencies": self.linguistic_analysis.word_frequencies(),
"common_two_phrases": self.linguistic_analysis.ngrams(),
"common_three_phrases": self.linguistic_analysis.ngrams(n=3),
"average_emotion_by_topic": self.emotional_analysis.avg_emotion_by_topic(),
"reply_time_by_emotion": self.temporal_analysis.avg_reply_time_per_emotion()
}
def user_analysis(self) -> dict:
return {
"top_users": self.interaction_analysis.top_users(),
"users": self.interaction_analysis.per_user_analysis(),
"interaction_graph": self.interaction_analysis.interaction_graph()
} }
def summary(self) -> dict: def summary(self) -> dict:
@@ -207,122 +104,6 @@ class StatGen:
"sources": self.df["source"].dropna().unique().tolist() "sources": self.df["source"].dropna().unique().tolist()
} }
def content_analysis(self, limit: int = 100) -> dict:
texts = (
self.df["content"]
.dropna()
.astype(str)
.str.lower()
)
words = []
for text in texts:
tokens = re.findall(r"\b[a-z]{3,}\b", text)
words.extend(
w for w in tokens
if w not in EXCLUDE_WORDS
)
counts = Counter(words)
word_frequencies = (
pd.DataFrame(counts.items(), columns=["word", "count"])
.sort_values("count", ascending=False)
.head(limit)
.reset_index(drop=True)
)
emotion_exclusions = [
"emotion_neutral",
"emotion_surprise"
]
emotion_cols = [
col for col in self.df.columns
if col.startswith("emotion_") and col not in emotion_exclusions
]
counts = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")
.size()
.rename("n")
)
avg_emotion_by_topic = (
self.df[
(self.df["topic"] != "Misc")
]
.groupby("topic")[emotion_cols]
.mean()
.reset_index()
)
avg_emotion_by_topic = avg_emotion_by_topic.merge(
counts,
on="topic"
)
return {
"word_frequencies": word_frequencies.to_dict(orient='records'),
"average_emotion_by_topic": avg_emotion_by_topic.to_dict(orient='records'),
"reply_time_by_emotion": self._avg_reply_time_per_emotion()
}
def user_analysis(self) -> dict:
counts = (
self.df.groupby(["author", "source"])
.size()
.sort_values(ascending=False)
)
top_users = [
{"author": author, "source": source, "count": int(count)}
for (author, source), count in counts.items()
]
per_user = (
self.df.groupby(["author", "type"])
.size()
.unstack(fill_value=0)
)
# ensure columns always exist
for col in ("post", "comment"):
if col not in per_user.columns:
per_user[col] = 0
per_user["comment_post_ratio"] = per_user["comment"] / per_user["post"].replace(0, 1)
per_user["comment_share"] = per_user["comment"] / (per_user["post"] + per_user["comment"]).replace(0, 1)
per_user = per_user.sort_values("comment_post_ratio", ascending=True)
per_user_records = per_user.reset_index().to_dict(orient="records")
vocab_rows = self._vocab_richness_per_user()
vocab_by_author = {row["author"]: row for row in vocab_rows}
# merge vocab richness + per_user information
merged_users = []
for row in per_user_records:
author = row["author"]
merged_users.append({
"author": author,
"post": int(row.get("post", 0)),
"comment": int(row.get("comment", 0)),
"comment_post_ratio": float(row.get("comment_post_ratio", 0)),
"comment_share": float(row.get("comment_share", 0)),
"vocab": vocab_by_author.get(author)
})
merged_users.sort(key=lambda u: u["comment_post_ratio"])
return {
"top_users": top_users,
"users": merged_users,
"interaction_graph": self._interaction_graph()
}
def search(self, search_query: str) -> dict: def search(self, search_query: str) -> dict:
self.df = self.df[ self.df = self.df[
self.df["content"].str.contains(search_query) self.df["content"].str.contains(search_query)