2026
WWW'26 coTherapist:
A Behavior-Aligned Small Language Model to Support Mental Healthcare Experts
The Web Conference'26 | January, 2026
@inproceedings{10.1145/3774904.3792988,
author = {Adhikary, Prottay Kumar and Rawat, Reena and Chakraborty, Tanmoy},
title = {coTherapist: A Behavior-Aligned Small Language Model Framework to Support
Mental Healthcare Experts},
year = {2026},
isbn = {9798400723070},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3774904.3792988},
doi = {10.1145/3774904.3792988},
abstract = {Access to mental healthcare is increasingly strained by workforce shortages
and rising demand, motivating the development of intelligent systems that can support
mental healthcare experts. We introduce coTherapist, a unified framework utilizing a
small language model to emulate core therapeutic competencies through domain-specific
fine-tuning, retrieval augmentation, and agentic reasoning. Evaluation on clinical
queries demonstrates that coTherapist generates more relevant and clinically grounded
responses than contemporary baselines. Using our novel T-BARS rubric and psychometric
profiling, we confirm coTherapist exhibits high empathy and therapist-consistent
personality traits. Furthermore, human evaluation by domain experts validates that
coTherapist delivers accurate, trustworthy, and safe responses. coTherapist was deployed
and tested by clinical experts. Collectively, these findings demonstrate that small
models can be engineered to exhibit expert-like behavior, offering a scalable pathway
for digital mental health tools.},
booktitle = {Proceedings of the ACM Web Conference 2026},
pages = {9529–9540},
numpages = {12},
keywords = {mental health care, small language models, retrieval augmented generation,
agentic framework, web-based interaction},
location = {United Arab Emirates},
series = {WWW '26}
}
2025
Preprint A
Comprehensive Review of Datasets for Clinical Mental Health AI Systems
arXiv | August, 2025
@article{mandal2025mentalhealthdatasets,
title={A Comprehensive Review of Datasets for Clinical Mental Health AI
Systems},
author={Mandal, Aishik and Adhikary, Prottay Kumar and Arnaout, Hiba and
Gurevych, Iryna and Chakraborty, Tanmoy},
journal={arXiv preprint arXiv:2508.09809},
year={2025}
}
Preprint Towards Richer
AI-Assisted Psychotherapy Note-Making and Performance Benchmarking
MedRixv | June, 2025
@article {Adhikary2025.06.25.25330252,
author = {Adhikary, Prottay Kumar and Singh, Sahajpreet and Singh, Suruchi and Sharma,
Panna and Soni, Pankhuri and Choudhary, Rashmi and Saxena, Charu and Chauhan, Prachi and
Gupta, Swati Kedia and Deb, Koushik Sinha and Singh, Salam Michael and Chakraborty,
Tanmoy},
title = {Towards Richer AI-Assisted Psychotherapy Note-Making and Performance
Benchmarking},
elocation-id = {2025.06.25.25330252},
year = {2025},
doi = {10.1101/2025.06.25.25330252},
publisher = {Cold Spring Harbor Laboratory Press},
abstract = {Psychotherapy note-making is crucial for effective patient care. However,
traditional formats such as SOAP (Subjective, Objective, Assessment, and Plan) and BIRP
(Behavior, Intervention, Response, and Plan) often fail to capture the nuanced
complexities of therapeutic sessions, as they primarily focus on surface-level details
and lack a comprehensive understanding of the patient{\textquoteright}s history, mental
status, and therapeutic process. While recent advances in Artificial Intelligence (AI)
and Large Language Models (LLMs) show promise in clinical documentation, their
application in psychotherapy note summarisation remains unexplored. We present iCARE
(identifiers, Chief Concerns and Clinical History, Assessment and Analysis, Risk and
Crisis, Engagement and Next Steps), a comprehensive framework for AI-assisted
psychotherapy documentation that addresses these limitations. iCARE comprises of 17
clinically relevant aspects, developed collaboratively with mental health professionals,
and aligned with established guidelines. We further introduce PATH (Psychotherapy
Aspects and Treatment History summary), a novel dataset of annotated therapy sessions.
Through extensive benchmarking with 11 LLMs, including both open and closed-source
models, we evaluate their performance across different note-taking aspects using
automatic and human evaluation metrics. Our results show that closed-source models like
Gemini Pro and GPT4o-mini excel in various aspects, with Gemini Pro achieving superior
human evaluation scores. Notably, all models struggle with temporal reasoning and
complex therapeutic interpretations. The findings suggest that current LLMs can assist
in basic documentation but require improvements in handling longitudinal therapeutic
relationships and aspects that require deeper clinical understanding and interpretative
reasoning. This work advances mental health care documentation while emphasising the
need for continued clinical expertise in psychotherapy note summarisation.Competing
Interest StatementThe authors have declared no competing interest.Funding
StatementTanmoy Chakraborty acknowledges the support of Tower Research Capital Markets
toward using machine learning for social good and Rajiv Khemani Young Faculty Chair
Professorship in Artificial Intelligence.Author DeclarationsI confirm all relevant
ethical guidelines have been followed, and any necessary IRB and/or ethics committee
approvals have been obtained.YesI confirm that all necessary patient/participant consent
has been obtained and the appropriate institutional forms have been archived, and that
any patient/participant/sample identifiers included were not known to anyone (e.g.,
hospital staff, patients or participants themselves) outside the research group so
cannot be used to identify individuals.YesI understand that all clinical trials and any
other prospective interventional studies must be registered with an ICMJE-approved
registry, such as ClinicalTrials.gov. I confirm that any such study reported in the
manuscript has been registered and the trial registration ID is provided (note: if
posting a prospective study registered retrospectively, please provide a statement in
the trial ID field explaining why the study was not registered in advance).YesI have
followed all appropriate research reporting guidelines, such as any relevant EQUATOR
Network research reporting checklist(s) and other pertinent material, if
applicable.YesAll conversation transcripts used in this study and all source code for
benchmarking experiments are publicly available in
https://github.com/proadhikary/iCARE/. https://github.com/proadhikary/iCARE},
URL = {https://www.medrxiv.org/content/early/2025/06/25/2025.06.25.25330252},
eprint =
{https://www.medrxiv.org/content/early/2025/06/25/2025.06.25.25330252.full.pdf},
journal = {medRxiv}
}
Journal Article Menstrual
Health Education Using a Specialized Large Language Model in India: Development and
Evaluation Study of MenstLLaMA
JMIR mHealth | May, 2025
@Article{info:doi/10.2196/71977,
author="Adhikary, Prottay Kumar
and Motiyani, Isha
and Oke, Gayatri
and Joshi, Maithili
and Pathak, Kanupriya
and Singh, Salam Michael
and Chakraborty, Tanmoy",
title="Menstrual Health Education Using a Specialized Large Language Model in India:
Development and Evaluation Study of MenstLLaMA",
journal="J Med Internet Res",
year="2025",
month="Jul",
day="16",
volume="27",
pages="e71977",
keywords="menstrual health education; artificial intelligence; large language model;
cultural sensitivity; health equity; digital health",
abstract="Background: The quality and accessibility of menstrual health education
(MHE) in low- and middle-income countries, including India, remain inadequate due to
persistent challenges (eg, poverty, social stigma, and gender inequality). While
community-driven initiatives have sought to raise awareness, artificial intelligence
offers a scalable and efficient solution for disseminating accurate information.
However, existing general-purpose large language models (LLMs) are often ill-suited
for this task, tending to exhibit low accuracy, cultural insensitivity, and overly
complex responses. To address these limitations, we developed MenstLLaMA---a
specialized LLM tailored to the Indian context and designed to deliver MHE
empathetically, supportively, and accessibly. Objective: We aimed to develop and
evaluate MenstLLaMA---a specialized LLM tailored to deliver accurate, culturally
sensitive MHE---and assess its effectiveness in comparison to existing
general-purpose models. Methods: We curated MENST---a novel, domain-specific dataset
comprising 23,820 question-answer pairs aggregated from medical websites, government
portals, and health education resources. This dataset was systematically annotated
with metadata capturing age groups, regions, topics, and sociocultural contexts.
MenstLLaMA was developed by fine-tuning Meta-LLaMA-3-8B-Instruct, using
parameter-efficient fine-tuning with low-rank adaptation to achieve domain alignment
while minimizing computational overhead. We benchmarked MenstLLaMA against 9
state-of-the-art general-purpose LLMs, including GPT-4o, Claude-3, Gemini 1.5 Pro,
and Mistral. The evaluation followed a multilayered framework: (1) automatic
evaluation using standard natural language processing metrics (BLEU [Bilingual
Evaluation Understudy], METEOR [Metric for Evaluation of Translation with Explicit
Ordering], ROUGE-L [Recall-Oriented Understudy for Gisting Evaluation-Longest Common
Subsequence], and BERTScore [Bidirectional Encoder Representations from Transformers
Score]); (2) evaluation by clinical experts (N=18), who rated 200 expert-curated
queries for accuracy and appropriateness; (3) medical practitioner interaction
through the ISHA (Intelligent System for Menstrual Health Assistance) interactive
chatbot, assessing qualitative dimensions (eg, relevance, understandability,
preciseness, correctness, and context sensitivity); and (4) a user study with
volunteer participants (N=200), who evaluated MenstLLaMA in 15- to 20-minute
randomized sessions, rating the system across 7 qualitative user satisfaction
metrics. Results: MenstLLaMA achieved the highest scores in BLEU (0.059) and
BERTScore (0.911), outperforming GPT-4o (BLEU: 0.052, BERTScore: 0.896) and Claude-3
(BERTScore: 0.888). Clinical experts preferred MenstLLaMA's responses over
gold-standard answers in several culturally sensitive cases. In medical
practitioners' evaluations using the ISHA---the chat interface powered by
MenstLLaMA---the model scored 3.5 in relevance, 3.6 in understandability, 3.1/5 in
preciseness, 3.5/5 in correctness, and 4.0/5 in context sensitivity. User
evaluations indicated even stronger results, with ratings of 4.7/5 for
understandability, 4.3/5 for relevance, 4.28/5 for preciseness, 4.1/5 for
correctness, 4.6/5 for tone, 4.2/5 for flow, and 3.9/5 for context sensitivity.
Conclusions: MenstLLaMA demonstrates exceptional accuracy, empathy, and user
satisfaction within the domain of MHE, bridging critical gaps left by
general-purpose LLMs. Its potential for integration into broader health education
platforms positions it as a transformative tool for menstrual well-being. Future
research could explore its long-term impact on public perception and menstrual
hygiene practices, while expanding demographic representation, enhancing context
sensitivity, and integrating multimodal and voice-based interactions to improve
accessibility across diverse user groups. ",
issn="1438-8871",
doi="10.2196/71977",
url="https://www.jmir.org/2025/1/e71977",
url="https://doi.org/10.2196/71977"
}
2024
Journal Article Exploring
the Efficacy of Large Language Models in Summarizing Mental Health Counseling
Sessions: Benchmark Study
JMIR Mental Health | February, 2024
@article{adhikary2024exploring,
title={Exploring the Efficacy of Large Language Models in Summarizing Mental Health
Counseling Sessions: Benchmark Study},
author={Adhikary, Prottay Kumar and Srivastava, Aseem and Kumar, Shivani and Singh,
Salam Michael and Manuja, Puneet and Gopinath, Jini K and Krishnan, Vijay and Gupta,
Swati Kedia and Deb, Koushik Sinha and Chakraborty, Tanmoy},
journal={JMIR Mental Health},
volume={11},
pages={e57306},
year={2024},
publisher={JMIR Publications Toronto, Canada}
}
2023
Conference Paper TRAVID: An
End-to-End Video Translation Framework
IJCNLP-AACL 2023, Bali, Indonesia | November, 2023
@InProceedings{adhikary-EtAl:2023:ijcnlp,
author = {Adhikary, Prottay Kumar and Sugandhi, Bandaru and Ghimire, Subhojit and
Pal, Santanu and Pakray, Partha},
title = {TRAVID: An End-to-End Video Translation Framework},
booktitle = {System Demonstrations},
month = {November},
year = {2023},
address = {Bali, Indonesia},
publisher = {Asian Federation of Natural Language Processing},
pages = {1--9}
}
Conference Paper CNLP-NITS
at SemEval-2023 Task 10: Online sexism prediction, PREDHATE!
The 17th International Workshop on Semantic Evaluation
(SemEval-2023) | July, 2023
@inproceedings{vetagiri-etal-2023-cnlp,
title = "{CNLP}-{NITS} at {S}em{E}val-2023 Task 10: Online sexism prediction,
{PREDHATE}!",
author = "Vetagiri, Advaitha and
Adhikary, Prottay and
Pakray, Partha and
Das, Amitava",
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation
(SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.113",
doi = "10.18653/v1/2023.semeval-1.113",
pages = "815--822",
abstract = "Online sexism is a rising issue that threatens women{'}s safety, fosters
hostile situations, and upholds social inequities. We describe a task SemEval-2023
Task 10 for creating English-language models that can precisely identify and
categorize sexist content on internet forums and social platforms like Gab and
Reddit as well to provide an explainability in order to address this problem. The
problem is divided into three hierarchically organized subtasks: binary sexism
detection, sexism by category, and sexism by fine-grained vector. The dataset
consists of 20,000 labelled entries. For Task A, pertained models like Convolutional
Neural Network (CNN) and Bidirectional Long Short-Term Memory (BiLSTM), which is
called CNN-BiLSTM and Generative Pretrained Transformer 2 (GPT-2) models were used,
as well as the GPT-2 model for Task B and C, and have provided experimental
configurations. According to our findings, the GPT-2 model performs better than the
CNN-BiLSTM model for Task A, while GPT-2 is highly accurate for Tasks B and C on the
training, validation and testing splits of the training data provided in the task.
Our proposed models allow researchers to create more precise and understandable
models for identifying and categorizing sexist content in online forums, thereby
empowering users and moderators.",
}
Conference Paper Leveraging
GPT-2
for automated classification of online sexist content
Working Notes of the Conference and Labs of the Evaluation
Forum (CLEF 2023)| May, 2023
@article{vetagiri2023leveraging,
title={Leveraging GPT-2 for automated classification of online sexist content},
author={Vetagiri, Advaitha and Adhikary, Prottay Kumar and Pakray, Partha and Das,
Amitava},
year={2023}
}
Journal Dzongkha Handwritten
Digit Recognition using Machine Learning Techniques
Procedia Computer Science | January, 2023
@article{ADHIKARY20232350,
title = {Dzongkha Handwritten Digit Recognition using Machine Learning Techniques},
journal = {Procedia Computer Science},
volume = {218},
pages = {2350-2358},
year = {2023},
note = {International Conference on Machine Learning and Data Engineering},
issn = {1877-0509},
doi = {https://doi.org/10.1016/j.procs.2023.01.210},
url = {https://www.sciencedirect.com/science/article/pii/S1877050923002107},
author = {Prottay Kumar Adhikary and Pankaj Dadure and Pradipta Saha and Tawmo and
Partha Pakray},
keywords = {Dzongkha Language, Character Recognition, Digit Recognition, Handwritten
Characters, Machine Learning},
abstract = {Handwritten digit recognition has recently gained importance, attracting
many researchers due to its use in various machine learning and computer vision
applications. As technology and science progressing, there is a need for a system to
recognize the handwritten script in several real-time applications to reduce human
effort. There a lot of work has been done on the recognition and generation of
handwritten digits of high-resource languages such as English. However, insufficient
work has been done on Dzongkha digits recognition, as Dzongkha digits are
low-resource and more complex than English patterns. This paper aims to perform
handwritten character recognition of Dzongkha digit using several machine learning
techniques. The unavailability of the Dzongkha handwritten digit dataset is the
prime motivation behind this work. To facilitates the recognition of Dzongkha
handwritten digit, we have collected the data of Dzongkha handwritten digit from
indigenous and non-indigenous people of Bhutan and provided the dataset for further
research. Moreover, we have used several machine algorithms, including a support
vector machine, K-nearest neighbor, and decision tree. Among these algorithms, the
support vector machine classification algorithm has achieved a remarkable result
with an accuracy of 98.29%.}
}
2022
Journal Investigation of
negation effect for En-As machine translation
Sādhanā | November, 2022
@Article{Laskar2022,
author={Laskar, Sahinur Rahman
and Gogoi, Abinash
and Dutta, Samudranil
and Adhikary, Prottay Kumar
and Nath, Prachurya
and Pakray, Partha
and Bandyopadhyay, Sivaji},
title={Investigation of negation effect for English--Assamese machine translation},
journal={S{\={a}}dhan{\={a}}},
year={2022},
month={Nov},
day={14},
volume={47},
number={4},
pages={238},
abstract={Computational linguistics deals with the computational modelling of
natural languages, in which machine translation is a popular task. The aim of
machine translation is to automatically translate one natural language into another,
which minimizes the linguistic barrier of different linguistic backgrounds. The
data-driven approach of machine translation, namely, neural machine translation
achieves state-of-the-art results on different language pairs, however it needs a
sufficient amount of parallel training data to attain reasonable translation
performance. In this work, we have explored different machine translation models on
a low-resource English--Assamese language pair and investigated different sources of
errors, particularly due to negation in English-to-Assamese and Assamese-to-English
translation. Negation is a universal, essential feature of human language that has a
substantial impact on the semantics of a statement. Moreover, a rule-based approach
is proposed in the data preprocessing step which handles modal-verb negation problem
that shows significant improvement in translation performance in terms of automatic
and manual evaluation scores.},
issn={0973-7677},
doi={10.1007/s12046-022-01965-5},
url={https://doi.org/10.1007/s12046-022-01965-5}
}
Conference Paper Image
Caption Generation for Low-Resource Assamese
Language
Conference on Computational Linguistics and Speech Processing
(ROCLING),
Taipei, Taiwan | November, 2022
@inproceedings{nath-etal-2022-image,
title = "Image Caption Generation for Low-Resource {A}ssamese Language",
author = "Nath, Prachurya and
Adhikary, Prottay Kumar and
Dadure, Pankaj and
Pakray, Partha and
Manna, Riyanka and
Bandyopadhyay, Sivaji",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and
Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language
Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.33",
pages = "263--272",
abstract = "Image captioning is a prominent Artificial Intelligence (AI) research
area that deals with visual recognition and a linguistic description of the image.
It is an interdisciplinary field concerning how computers can see and understand
digital images{\&} videos, and describe them in a language known to humans.
Constructing a meaningful sentence needs both structural and semantic information of
the language. This paper highlights the contribution of image caption generation for
the Assamese language. The unavailability of an image caption generation system for
the Assamese language is an open problem for AI-NLP researchers, and it{'}s just an
early stage of the research. To achieve our defined objective, we have used the
encoder-decoder framework, which combines the Convolutional Neural Networks and the
Recurrent Neural Networks. The experiment has been tested on Flickr30k and Coco
Captions dataset, which have been originally present in the English language. We
have translated these datasets into Assamese language using the state-of-the-art
Machine Translation (MT) system for our designed work.",
}
Book
Chapter Ontology-based
healthcare hierarchy towards chatbot
4th International Conference, CICBA 2022, Silchar, India |
July, 2022
@InProceedings{10.1007/978-3-031-10766-5_26,
author="Adhikary, Prottay Kumar
and Manna, Riyanka
and Laskar, Sahinur Rahman
and Pakray, Partha",
editor="Mukhopadhyay, Somnath
and Sarkar, Sunita
and Dutta, Paramartha
and Mandal, Jyotsna Kumar
and Roy, Sudipta",
title="Ontology-Based Healthcare Hierarchy Towards Chatbot",
booktitle="Computational Intelligence in Communications and Business Analytics",
year="2022",
publisher="Springer International Publishing",
address="Cham",
pages="326--335",
abstract="Ontology refers to relationship-based hierarchical descriptions of
concepts within a particular domain. Ontology, in the field of medicine, describes
the concepts of medical terminologies and the relation between them, thus, enabling
the sharing of medical knowledge. This paper aims to develop an ontology-based
healthcare hierarchy and point out the research scope towards the chatbot
application. The research scope includes the integration of the ontology-based
healthcare hierarchy in the chatbot application by the establishment of
relationships among individuals and real-world entities.",
isbn="978-3-031-10766-5"
}
Book
Chapter An Empirical Analysis on Abstractive Text Summarization
4th International Conference, CICBA 2022, Silchar, India |
July, 2022
@InProceedings{10.1007/978-3-031-10766-5_22,
author="Tawmo
and Adhikary, Prottay Kumar
and Dadure, Pankaj
and Pakray, Partha",
editor="Mukhopadhyay, Somnath
and Sarkar, Sunita
and Dutta, Paramartha
and Mandal, Jyotsna Kumar
and Roy, Sudipta",
title="An Empirical Analysis on Abstractive Text Summarization",
booktitle="Computational Intelligence in Communications and Business Analytics",
year="2022",
publisher="Springer International Publishing",
address="Cham",
pages="280--287",
abstract="With the massive growth of blogs, news stories, and reports, extracting
useful information from such a large quantity of textual documents has become a
difficult task. Automatic text summarization is an excellent approach for
summarising these documents. Text summarization aims to condense large documents
into concise summaries while preserving essential information and meaning. A variety
of fascinating summarising models have been developed to achieve state-of-the-art
performance in terms of fluency, human readability, and semantically meaningful
summaries. In this paper, we have investigated the OpenNMT tool for task text
summarization. The OpenNMT is the encoder-decoder-based neural machine translation
model which has been fine-tuned for the task of abstractive text summarization. The
proposed OpenNMT based text summarization approach has been tested on freely
available dataset such as CNNDM {\&} MSMO dataset and depicts their proficiency in
terms of ROUGE and BLEU score.",
isbn="978-3-031-10766-5"
}
Dataset Dzongkha Handwritten
Digit Dataset
IAPR-TC11: Association for Pattern Recognition Technical
Committee Number 11 | February, 2022
@dataset{tawmo_2022_6271560,
author = {Tawmo and
Prottay Kumar Adhikary and
Pankaj Dadure and
Partha Pakray},
title = {Dzongkha Handwritten Digit Dataset},
month = feb,
year = 2022,
publisher = {Zenodo},
version = {1.0.0},
doi = {10.5281/zenodo.6271560},
url = {https://doi.org/10.5281/zenodo.6271560}
}
2021
Conference Paper Neural
Machine Translation for Tamil–Telugu Pair
Proceedings of the Sixth Conference on Machine Translation,
EMNLP, WMT (Online) | November, 2021
@inproceedings{laskar-etal-2021-neural,
title = "Neural Machine Translation for {T}amil{--}{T}elugu Pair",
author = "Laskar, Sahinur Rahman and
Paul, Bishwaraj and
Adhikary, Prottay Kumar and
Pakray, Partha and
Bandyopadhyay, Sivaji",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wmt-1.29",
pages = "284--287",
abstract = "The neural machine translation approach has gained popularity in machine
translation because of its context analysing ability and its handling of long-term
dependency issues. We have participated in the WMT21 shared task of similar language
translation on a Tamil-Telugu pair with the team name: CNLP-NITS. In this task, we
utilized monolingual data via pre-train word embeddings in transformer model based
neural machine translation to tackle the limitation of parallel corpus. Our model
has achieved a bilingual evaluation understudy (BLEU) score of 4.05, rank-based
intuitive bilingual evaluation score (RIBES) score of 24.80 and translation edit
rate (TER) score of 97.24 for both Tamil-to-Telugu and Telugu-to-Tamil translations
respectively.",
}