@article{Zia_Castro_Zubiaga_Tyson_2022, title={Improving Zero-Shot Cross-Lingual Hate Speech Detection with Pseudo-Label Fine-Tuning of Transformer Language Models}, volume={16}, url={https://ojs.aaai.org/index.php/ICWSM/article/view/19402}, DOI={10.1609/icwsm.v16i1.19402}, abstractNote={Hate speech has proliferated on social media platforms in recent years. While this has been the focus of many studies, most works have exclusively focused on a single language, generally English. Low-resourced languages have been neglected due to the dearth of labeled resources. These languages, however, represent an important portion of the data due to the multilingual nature of social media. This work presents a novel zero-shot, cross-lingual transfer learning pipeline based on pseudo-label fine-tuning of Transformer Language Models for automatic hate speech detection. We employ our pipeline on benchmark datasets covering English (source) and 6 different non-English (target) languages written in 3 different scripts. Our pipeline achieves an average improvement of 7.6% (in terms of macro-F1) over previous zero-shot, cross-lingual models. This demonstrates the feasibility of high accuracy automatic hate speech detection for low-resource languages. We release our code and models at https://github.com/harisbinzia/ZeroshotCrosslingualHateSpeech.}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={Zia, Haris Bin and Castro, Ignacio and Zubiaga, Arkaitz and Tyson, Gareth}, year={2022}, month={May}, pages={1435-1439} }