2024
Journal Article Exploring
the Efficacy of Large Language Models in Summarizing Mental Health Counseling
Sessions: Benchmark Study
JMIR Ment Health | February, 2024
@article{adhikary2024exploring,
title={Exploring the Efficacy of Large Language Models in Summarizing Mental Health
Counseling Sessions: Benchmark Study},
author={Adhikary, Prottay Kumar and Srivastava, Aseem and Kumar, Shivani and Singh,
Salam Michael and Manuja, Puneet and Gopinath, Jini K and Krishnan, Vijay and Gupta,
Swati Kedia and Deb, Koushik Sinha and Chakraborty, Tanmoy},
journal={JMIR Mental Health},
volume={11},
pages={e57306},
year={2024},
publisher={JMIR Publications Toronto, Canada}
}
2023
Conference Paper TRAVID: An
End-to-End Video Translation Framework
IJCNLP-AACL 2023, Bali, Indonesia | November, 2023
@InProceedings{adhikary-EtAl:2023:ijcnlp,
author = {Adhikary, Prottay Kumar and Sugandhi, Bandaru and Ghimire, Subhojit and
Pal, Santanu and Pakray, Partha},
title = {TRAVID: An End-to-End Video Translation Framework},
booktitle = {System Demonstrations},
month = {November},
year = {2023},
address = {Bali, Indonesia},
publisher = {Asian Federation of Natural Language Processing},
pages = {1--9}
}
Conference Paper CNLP-NITS
at SemEval-2023 Task 10: Online sexism prediction, PREDHATE!
The 17th International Workshop on Semantic Evaluation
(SemEval-2023) | July, 2023
@inproceedings{vetagiri-etal-2023-cnlp,
title = "{CNLP}-{NITS} at {S}em{E}val-2023 Task 10: Online sexism prediction,
{PREDHATE}!",
author = "Vetagiri, Advaitha and
Adhikary, Prottay and
Pakray, Partha and
Das, Amitava",
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation
(SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.113",
doi = "10.18653/v1/2023.semeval-1.113",
pages = "815--822",
abstract = "Online sexism is a rising issue that threatens women{'}s safety, fosters
hostile situations, and upholds social inequities. We describe a task SemEval-2023
Task 10 for creating English-language models that can precisely identify and
categorize sexist content on internet forums and social platforms like Gab and
Reddit as well to provide an explainability in order to address this problem. The
problem is divided into three hierarchically organized subtasks: binary sexism
detection, sexism by category, and sexism by fine-grained vector. The dataset
consists of 20,000 labelled entries. For Task A, pertained models like Convolutional
Neural Network (CNN) and Bidirectional Long Short-Term Memory (BiLSTM), which is
called CNN-BiLSTM and Generative Pretrained Transformer 2 (GPT-2) models were used,
as well as the GPT-2 model for Task B and C, and have provided experimental
configurations. According to our findings, the GPT-2 model performs better than the
CNN-BiLSTM model for Task A, while GPT-2 is highly accurate for Tasks B and C on the
training, validation and testing splits of the training data provided in the task.
Our proposed models allow researchers to create more precise and understandable
models for identifying and categorizing sexist content in online forums, thereby
empowering users and moderators.",
}
Conference Paper Leveraging
GPT-2
for automated classification of online sexist content
Working Notes of the Conference and Labs of the Evaluation
Forum (CLEF 2023)| May, 2023
@article{vetagiri2023leveraging,
title={Leveraging GPT-2 for automated classification of online sexist content},
author={Vetagiri, Advaitha and Adhikary, Prottay Kumar and Pakray, Partha and Das,
Amitava},
year={2023}
}
Journal Dzongkha Handwritten
Digit Recognition using Machine Learning Techniques
Procedia Computer Science | January, 2023
@article{ADHIKARY20232350,
title = {Dzongkha Handwritten Digit Recognition using Machine Learning Techniques},
journal = {Procedia Computer Science},
volume = {218},
pages = {2350-2358},
year = {2023},
note = {International Conference on Machine Learning and Data Engineering},
issn = {1877-0509},
doi = {https://doi.org/10.1016/j.procs.2023.01.210},
url = {https://www.sciencedirect.com/science/article/pii/S1877050923002107},
author = {Prottay Kumar Adhikary and Pankaj Dadure and Pradipta Saha and Tawmo and
Partha Pakray},
keywords = {Dzongkha Language, Character Recognition, Digit Recognition, Handwritten
Characters, Machine Learning},
abstract = {Handwritten digit recognition has recently gained importance, attracting
many researchers due to its use in various machine learning and computer vision
applications. As technology and science progressing, there is a need for a system to
recognize the handwritten script in several real-time applications to reduce human
effort. There a lot of work has been done on the recognition and generation of
handwritten digits of high-resource languages such as English. However, insufficient
work has been done on Dzongkha digits recognition, as Dzongkha digits are
low-resource and more complex than English patterns. This paper aims to perform
handwritten character recognition of Dzongkha digit using several machine learning
techniques. The unavailability of the Dzongkha handwritten digit dataset is the
prime motivation behind this work. To facilitates the recognition of Dzongkha
handwritten digit, we have collected the data of Dzongkha handwritten digit from
indigenous and non-indigenous people of Bhutan and provided the dataset for further
research. Moreover, we have used several machine algorithms, including a support
vector machine, K-nearest neighbor, and decision tree. Among these algorithms, the
support vector machine classification algorithm has achieved a remarkable result
with an accuracy of 98.29%.}
}
2022
Journal Investigation of
negation effect for En-As machine translation
Sādhanā | November, 2022
@Article{Laskar2022,
author={Laskar, Sahinur Rahman
and Gogoi, Abinash
and Dutta, Samudranil
and Adhikary, Prottay Kumar
and Nath, Prachurya
and Pakray, Partha
and Bandyopadhyay, Sivaji},
title={Investigation of negation effect for English--Assamese machine translation},
journal={S{\={a}}dhan{\={a}}},
year={2022},
month={Nov},
day={14},
volume={47},
number={4},
pages={238},
abstract={Computational linguistics deals with the computational modelling of
natural languages, in which machine translation is a popular task. The aim of
machine translation is to automatically translate one natural language into another,
which minimizes the linguistic barrier of different linguistic backgrounds. The
data-driven approach of machine translation, namely, neural machine translation
achieves state-of-the-art results on different language pairs, however it needs a
sufficient amount of parallel training data to attain reasonable translation
performance. In this work, we have explored different machine translation models on
a low-resource English--Assamese language pair and investigated different sources of
errors, particularly due to negation in English-to-Assamese and Assamese-to-English
translation. Negation is a universal, essential feature of human language that has a
substantial impact on the semantics of a statement. Moreover, a rule-based approach
is proposed in the data preprocessing step which handles modal-verb negation problem
that shows significant improvement in translation performance in terms of automatic
and manual evaluation scores.},
issn={0973-7677},
doi={10.1007/s12046-022-01965-5},
url={https://doi.org/10.1007/s12046-022-01965-5}
}
Conference Paper Image
Caption Generation for Low-Resource Assamese
Language
Conference on Computational Linguistics and Speech Processing
(ROCLING),
Taipei, Taiwan | November, 2022
@inproceedings{nath-etal-2022-image,
title = "Image Caption Generation for Low-Resource {A}ssamese Language",
author = "Nath, Prachurya and
Adhikary, Prottay Kumar and
Dadure, Pankaj and
Pakray, Partha and
Manna, Riyanka and
Bandyopadhyay, Sivaji",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and
Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language
Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.33",
pages = "263--272",
abstract = "Image captioning is a prominent Artificial Intelligence (AI) research
area that deals with visual recognition and a linguistic description of the image.
It is an interdisciplinary field concerning how computers can see and understand
digital images{\&} videos, and describe them in a language known to humans.
Constructing a meaningful sentence needs both structural and semantic information of
the language. This paper highlights the contribution of image caption generation for
the Assamese language. The unavailability of an image caption generation system for
the Assamese language is an open problem for AI-NLP researchers, and it{'}s just an
early stage of the research. To achieve our defined objective, we have used the
encoder-decoder framework, which combines the Convolutional Neural Networks and the
Recurrent Neural Networks. The experiment has been tested on Flickr30k and Coco
Captions dataset, which have been originally present in the English language. We
have translated these datasets into Assamese language using the state-of-the-art
Machine Translation (MT) system for our designed work.",
}
Book
Chapter Ontology-based
healthcare hierarchy towards chatbot
4th International Conference, CICBA 2022, Silchar, India |
July, 2022
@InProceedings{10.1007/978-3-031-10766-5_26,
author="Adhikary, Prottay Kumar
and Manna, Riyanka
and Laskar, Sahinur Rahman
and Pakray, Partha",
editor="Mukhopadhyay, Somnath
and Sarkar, Sunita
and Dutta, Paramartha
and Mandal, Jyotsna Kumar
and Roy, Sudipta",
title="Ontology-Based Healthcare Hierarchy Towards Chatbot",
booktitle="Computational Intelligence in Communications and Business Analytics",
year="2022",
publisher="Springer International Publishing",
address="Cham",
pages="326--335",
abstract="Ontology refers to relationship-based hierarchical descriptions of
concepts within a particular domain. Ontology, in the field of medicine, describes
the concepts of medical terminologies and the relation between them, thus, enabling
the sharing of medical knowledge. This paper aims to develop an ontology-based
healthcare hierarchy and point out the research scope towards the chatbot
application. The research scope includes the integration of the ontology-based
healthcare hierarchy in the chatbot application by the establishment of
relationships among individuals and real-world entities.",
isbn="978-3-031-10766-5"
}
Book
Chapter An Empirical Analysis on Abstractive Text Summarization
4th International Conference, CICBA 2022, Silchar, India |
July, 2022
@InProceedings{10.1007/978-3-031-10766-5_22,
author="Tawmo
and Adhikary, Prottay Kumar
and Dadure, Pankaj
and Pakray, Partha",
editor="Mukhopadhyay, Somnath
and Sarkar, Sunita
and Dutta, Paramartha
and Mandal, Jyotsna Kumar
and Roy, Sudipta",
title="An Empirical Analysis on Abstractive Text Summarization",
booktitle="Computational Intelligence in Communications and Business Analytics",
year="2022",
publisher="Springer International Publishing",
address="Cham",
pages="280--287",
abstract="With the massive growth of blogs, news stories, and reports, extracting
useful information from such a large quantity of textual documents has become a
difficult task. Automatic text summarization is an excellent approach for
summarising these documents. Text summarization aims to condense large documents
into concise summaries while preserving essential information and meaning. A variety
of fascinating summarising models have been developed to achieve state-of-the-art
performance in terms of fluency, human readability, and semantically meaningful
summaries. In this paper, we have investigated the OpenNMT tool for task text
summarization. The OpenNMT is the encoder-decoder-based neural machine translation
model which has been fine-tuned for the task of abstractive text summarization. The
proposed OpenNMT based text summarization approach has been tested on freely
available dataset such as CNNDM {\&} MSMO dataset and depicts their proficiency in
terms of ROUGE and BLEU score.",
isbn="978-3-031-10766-5"
}
Dataset Dzongkha Handwritten
Digit Dataset
IAPR-TC11: Association for Pattern Recognition Technical
Committee Number 11 | February, 2022
@dataset{tawmo_2022_6271560,
author = {Tawmo and
Prottay Kumar Adhikary and
Pankaj Dadure and
Partha Pakray},
title = {Dzongkha Handwritten Digit Dataset},
month = feb,
year = 2022,
publisher = {Zenodo},
version = {1.0.0},
doi = {10.5281/zenodo.6271560},
url = {https://doi.org/10.5281/zenodo.6271560}
}
2021
Conference Paper Neural
Machine Translation for Tamil–Telugu Pair
Proceedings of the Sixth Conference on Machine Translation,
EMNLP, WMT (Online) | November, 2021
@inproceedings{laskar-etal-2021-neural,
title = "Neural Machine Translation for {T}amil{--}{T}elugu Pair",
author = "Laskar, Sahinur Rahman and
Paul, Bishwaraj and
Adhikary, Prottay Kumar and
Pakray, Partha and
Bandyopadhyay, Sivaji",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wmt-1.29",
pages = "284--287",
abstract = "The neural machine translation approach has gained popularity in machine
translation because of its context analysing ability and its handling of long-term
dependency issues. We have participated in the WMT21 shared task of similar language
translation on a Tamil-Telugu pair with the team name: CNLP-NITS. In this task, we
utilized monolingual data via pre-train word embeddings in transformer model based
neural machine translation to tackle the limitation of parallel corpus. Our model
has achieved a bilingual evaluation understudy (BLEU) score of 4.05, rank-based
intuitive bilingual evaluation score (RIBES) score of 24.80 and translation edit
rate (TER) score of 97.24 for both Tamil-to-Telugu and Telugu-to-Tamil translations
respectively.",
}
Research
LCS2, IIT Delhi
Research Assistant (Onsite)
| November 2023 - December 2023
- Project: AI for Mental Health
- Played a role in developing LLMs to enhance mental health counseling and address key issues within the field. Major contributions include identifying dialogue acts and summarizing mental health counseling conversations, with further work extending to aspect-based summarization of counseling components.
- Skills: Large Language Models, Dialouge Summerization, Speaker Profiling.
CSTAR, IIIT Hyderabad
Machine Leaning Intern (Onsite)
| August 2023 - December 2023
- Project: Engaged in pioneering research on Quantization for Large Language Models as an Intern at Center for Security, Theory and Algorithmic Research
- Reviewed literature related to Large Language Model quantization, with a particular focus on reparameterization-based Parameter-Efficient Fine-Tuning (PEFT) techniques. Implemented these techniques with real-world large language models (LoRA, QLoRA, AdaLoRA) and conducted research to discover new methods for enhancing compression efficiency.
- Skills: Large Language Models, Transformers, PEFT
National Institute of
Technology
Silchar
Summer Research Intern (Onsite)
| June 2022 - July 2022
- Project: Translated English video into Hindi, Bengali, Telugu, and Nepali with lip synchronization.
- Ensured accurate lip movements for seamless viewing across languages. Aims to bridge linguistic barriers and enhance content accessibility for diverse audiences.
- Skills: Speech & Video Processing, Transfer Learning, Machine Translation
National Institute of
Technology
Silchar
Winter Research Intern (Onsite)
| December 2021 - February 2022
- Project: Explore machine translation for low-resource English–Assamese language pair.
- Examined impact of negation in both English-to-Assamese and Assamese-to-English translation & investigated various sources of errors related to negation. Proposed rule-based approach in data preprocessing to handle modal-verb negation problem & achieved significant improvements in translation through scoring methods.
- Skills: NLTK, Keras, Data Analytics, Scikit-Learn, TensorFlow, Python
Indian Institute of
Technology, Guwahati
Summer Research Intern (Onsite)
| July 2021 - September 2021
- Project: Hate speech detection in code-mixed Assamese and Bengali YouTube comments
- Collected dataset of code-mixed AS-BN comments & Applied data preprocessing techniques
- Developed robust hate speech detection model using ML and NLP
- Skills: NLTK, Keras, Scikit-Learn, TensorFlow, Python
Part-Time
Greenline Books
Web Developer (Remote)
| August 2023 - Resumed
- Position: Website Designing, Development & Database Maintencence
- Created user-centric website aligning with brand identity, using Wix expertise to integrate multimedia and optimize engagement. Collaborating across teams, transformed concepts into captivating, responsive sites, staying updated on trends to deliver cutting-edge solutions that enhance online presence and user experiences
- Skills: HTML5/CSS/JS, Wix Editor, MySQL
Level Innovations Pvt Ltd.
NLP Tutor (Remote)
| September 2022 - Jan 2023
- Position: Academic tutor and research assistant at Levelapp in deep learning field.
- Provided guidance and comprehensive tutoring to enhance students’ understanding. Contributed to research projects by conducting in-depth analyses and assisting in data collection and experimentation
- Skills: Advanced NLP, Deep Learning, PyTorch, BERT (Language Model)
Leadership
Gymkhana Union Body
International Student
Representitive | March 2022 – June 2023
- Advocated for the interests and concerns of international students as an International Student Representative at NIT Silchar, acting as a liaison between international students and the larger student community.
- Organized events and activities to promote cultural exchange and integration between international and domestic students, fostering a supportive and inclusive environment.
- Addressed and resolved various challenges faced by international students, collaborating with the university administration and faculty to ensure their overall well-being and academic success at NIT Silchar.
Notre Dame English Club
Vice President of Archive & Documentation
| January 2018 - Jan 2023
- Played a pivotal role in preserving and showcasing the club's historical records and literary works, enhancing its online presence.
- Demonstrated proficiency in managing digital content and fostering a creative and literary environment.
- Skills: Web Development, Web Content Writing, Digital Archiving
Volunteer
- Designed Website for MIP Lab, NITS in 2023
- Web Developer at Indic MT Task, EMNLP 2023
- Program Committee Member and Reviewer at AIC 2023
- Program Committee Member and reviewer at PCCDA 2023
- Program Committee Member and reviewer at AIC 2022
- Program Committee Member and reviewer at ICCIS 2022
- Task Coordinator and Web Developer at TextSumEval 2022
https://github.com/proadhikary/floodd
Floodd: Unraveling India's Flood Odds
Python, Steamlit,
Folium
May 2024
Floodd provides a comprehensive
analysis of flood events, utilizing a range of visualizations to explore patterns,
trends, and impacts based on geographical data, temporal factors, and causal
information. It is designed to aid researchers, policymakers, and the public in
understanding the dynamics of floods and facilitating data-driven decision-making in
disaster management and mitigation strategies.
https://github.com/proadhikary/MenstBot
MenstBot: A Menstrual Health Chatbot
Python,
Langchain, Chainlit, FAISS
March 2024
MenstBot is a menstrual health
chatbot based on Llama2, designed to provide information and support regarding
menstruation-related queries.
https://github.com/proadhikary/TRAVID
TRAVID: Translate Videos Online
Python, Flask,
Firebase
March 2023
TRAVID is a small-scale
Face-to-Face Video Translator project that lets users translate a Tshort-duration video
in one
language to a selected language while maintaining the lip synchronisation, as well as an
attempt at voice-cloning.
https://comp-mt.streamlit.app/
COMP-MT: All-in-one Machine Translation Evaluation System
Python, Streamlit
January 2023
COMPMT is an all-in-one MT
Evaluation System designed with Streamlit, enabling users to efficiently assess their
test
files against gold files for comprehensive machine translation evaluation.
https://github.com/proadhikary/predhate
PredHate: Detect Sexism Online
Python, Flask
October 2022
A flask webapp, which can detect
sexism online, and provided few features like to check the history and can classify
multiple text at a time.
https://github.com/CNLP-Summarization/Saransa
Saransa: An Summerization & Knowlage Graph Generation
Python, Flask, SQL
April 2022
Saransa is a Flask-based project
utilizing a pretrained BERT model to offer a powerful Summarization & Knowledge
Graph Generation Tool.
Botica: Ontology Based Healthcare ChatBot
RASA, Python
June 2021
Botica is an RASA-based
Ontology-Based Healthcare ChatBot, offering a first aid help system to provide immediate
medical assistance and guidance.
https://github.com/proadhikary/Beeth
Beeth: A Music Player for the Hearing Impaired
C Language
September 2020
A music player catering to the
hearing impaired, offering enhanced accessibility features for a tailored musical
experience for 'Feature Phones'.
https://cse23.xyz/
⭐ CSE’23: A Web-based Distance Learning Program
HTML, CS, JS
March 2020
CSE’23 provides information and
resources for the students of Computer Science and Engineering. The website contains
study materials, previous exams papers, assignments and many more. It a useful platform
for those who want to learn
more about computer science and engineering