Finish off the links between frontend and backend #10
@@ -61,3 +61,19 @@ class LinguisticAnalysis:
|
|||||||
.head(limit)
|
.head(limit)
|
||||||
.to_dict(orient="records")
|
.to_dict(orient="records")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def lexical_diversity(self, df: pd.DataFrame) -> dict:
|
||||||
|
tokens = (
|
||||||
|
df["content"].fillna("").astype(str).str.lower()
|
||||||
|
.str.findall(r"\b[a-z]{2,}\b")
|
||||||
|
.explode()
|
||||||
|
)
|
||||||
|
tokens = tokens[~tokens.isin(self.word_exclusions)]
|
||||||
|
total = max(len(tokens), 1)
|
||||||
|
unique = int(tokens.nunique())
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_tokens": total,
|
||||||
|
"unique_tokens": unique,
|
||||||
|
"ttr": round(unique / total, 4),
|
||||||
|
}
|
||||||
|
|||||||
@@ -94,6 +94,7 @@ class StatGen:
|
|||||||
"word_frequencies": self.linguistic_analysis.word_frequencies(filtered_df),
|
"word_frequencies": self.linguistic_analysis.word_frequencies(filtered_df),
|
||||||
"common_two_phrases": self.linguistic_analysis.ngrams(filtered_df),
|
"common_two_phrases": self.linguistic_analysis.ngrams(filtered_df),
|
||||||
"common_three_phrases": self.linguistic_analysis.ngrams(filtered_df, n=3),
|
"common_three_phrases": self.linguistic_analysis.ngrams(filtered_df, n=3),
|
||||||
|
"lexical_diversity": self.linguistic_analysis.lexical_diversity(filtered_df)
|
||||||
}
|
}
|
||||||
|
|
||||||
def emotional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
def emotional(self, df: pd.DataFrame, filters: dict | None = None) -> dict:
|
||||||
|
|||||||
Reference in New Issue
Block a user