@article{_Singh_Arora_Shrivastava_Singh_Shah_Kumaraguru_2022, title={Twitter-STMHD: An Extensive User-Level Database of Multiple Mental Health Disorders}, volume={16}, url={https://ojs.aaai.org/index.php/ICWSM/article/view/19368}, DOI={10.1609/icwsm.v16i1.19368}, abstractNote={Social Media is equipped with the ability to track and quantify user behavior, establishing it as an appropriate resource for mental health studies. However, previous efforts in the area have been limited by the lack of data and contextually relevant information. There is a need for large-scale, well-labeled mental health datasets with fast reproducible methods to facilitate their heuristic growth. In this paper, we cater to this need by building the Twitter - Self-Reported Temporally-Contextual Mental Health Diagnosis Dataset (Twitter-STMHD), a large scale, user-level dataset grouped into 8 disorder categories and a companion class of control users. The dataset is 60% hand-annotated, which lead to the creation of high-precision self-reported diagnosis report patterns, used for the construction of the rest of the dataset. The dataset, instead of being a corpus of tweets, is a collection of user-profiles of those suffering from mental health disorders to provide a holistic view of the problem statement. By leveraging temporal information, the data for a given profile in the dataset has been collected for disease prevalence periods: onset of disorder, diagnosis and progression, along with a fourth period: COVID-19. This is the only and the largest dataset that captures the tweeting activity of users suffering from mental health disorders during the COVID-19 period.}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={, Suhavi and Singh, Asmit Kumar and Arora, Udit and Shrivastava, Somyadeep and Singh, Aryaveer and Shah, Rajiv Ratn and Kumaraguru, Ponnurangam}, year={2022}, month={May}, pages={1182-1191} }