@article{3022, author = {Chiyu Zhang, Muhammad Abdul-Mageed}, title = {A Pre-trained BERT Model for Arabic Author Profiling}, journal = {Journal of E - Technology}, year = {2020}, volume = {11}, number = {2}, doi = {https://doi.org/10.6025/jet/2020/11/2/54-59}, url = {http://www.dline.info/jet/fulltext/v11n2/jetv11n2_2.pdf}, abstract = {We report our models for detecting age, language variety, and gender from social media data in the context of the Arabic author profiling and deception detection shared task (APDA) [32].We build simple models based on pre-trained bidirectional encoders from transformers (BERT). We first fine-tune the pre-trained BERT model on each of the three datasets with shared task released data. Then we augment shared task data with in-house data for gender and dialect, showing the utility of augmenting training data. Our best models on the shared task test data are acquired with a majority voting of various BERT models trained under different data conditions. We acquire 54.72% accuracy for age, 93.75% for dialect, 81.67% for gender, and 40.97% joint accuracy across the three tasks.1}, }