@article {
author = {Mohammadi, Shahriar and Mousavi, MirReza},
title = {Investigating the Impact of Ensemble Machine Learning Methods on Spam Review Detection Based on Behavioral Features},
journal = {Journal of Soft Computing and Information Technology},
volume = {9},
number = {3},
pages = {132-147},
year  = {2020},
publisher = {Babol Noshirvani University of Technology},
issn = {2383-1006}, 
eissn = {2588-4913}, 
doi = {},
abstract = {One of the most influential links on the Internet is the feedback provided by consumers as an experience of using the product to the people who want to buy that product. Beneficiaries use this opportunity to transfer inaccurate experience in order to promote or demote the value of a particular service or product unjustly, and this is the cause of placing their reviews between spam reviews category. Therefore, identifying these reviews using machine learning techniques and ensemble learners has become a hot topic among researchers. The purpose of this study is to investigate the impact of using ensemble machine learning methods on identifying such reviews using behavioral features. Recent studies have shown that the ensemble methods used in this study in combination with text-based features in addition to imposing more computational expense are not able to improve the performance of the best base learners. In this study, in addition to identifying the best base and ensemble learners in using behavioral features, we seek to determine whether these features combination with ensemble learners can achieve greater accuracy or a significant change in model performance. For this purpose, seven base learners and four ensemble learners such as Bagging, Boosting, Random Forest and Extra Tree were used and the results were compared with the results of using text-based features. Our evaluations show that using the decision tree as a base learner, along with the method of boosting in unbalanced data set and bagging in balanced dataset, yields better results and we can achieve more tangible change in the performance of the best base algorithms by ensemble learners in using behavioral features over text-based.},
keywords = {Spam Reviews,Machine Learning,Ensemble Methods,Behavioral Features},
title_fa = {بررسی تأثیر استفاده از روش‌های یادگیری ماشین تجمعی در شناسایی نظر‌‌های هرز بر اساس ویژگی‌های رفتاری},
abstract_fa = {یکی از تأثیرگذارترین ارتباط‌ها در اینترنت، نظر‌‌هایی است که توسط افراد مصرف‌کننده یک محصول به‌عنوان تجربه استفاده، در اختیار افراد خواهان خرید محصول قرار می‌گیرد. استفاده سودجویان از این فرصت انتقال تجربه، به‌منظور ارتقا یا تنزل ارزش یک خدمت یا محصول خاص به‌ناحق، باعث قرارگیری نظر‌‌های آن‌ها در دسته نظر‌‌های هرز می‌شود. ازاین‌رو شناسایی این نظرها با استفاده از روش‌های یادگیری ماشین و یادگیرنده‌های تجمعی به مبحثی داغ در میان محققان تبدیل شده است. هدف این مطالعه بررسی تأثیر استفاده از روش‌های یادگیری ماشین تجمعی در شناسایی اینگونه نظرها با استفاده از ویژگی‌های رفتاری است. بررسی‌های اخیر نشان داده است که روش‌های تجمعی مورد استفاده در این مطالعه در ادغام با ویژگی‌های متنی علاوه بر تحمیل بار محاسباتی بیشتر قادر به ارتقای عملکرد بهترین الگوریتم‌‌های پایه نیستند. در این مطالعه علاوه‌بر شناسایی بهترین یادگیرنده‌های پایه و تجمعی در استفاده از ویژگی‌های رفتاری به‌دنبال آن هستیم که آیا می‌توان با استفاده از این ویژگی‌ها و یادگیرنده‌های تجمعی به دقتی بیشتر و یا تغییر محسوسی در عملکرد مدل دست یابیم. بدین منظور از هفت یادگیرنده پایه و چهار یادگیرنده تجمعی دسته‌بندی، تقویت‌سازی، جنگل تصادفی و درخت اضافی استفاده شد و نتایج حاصل با نتایج استفاده از ویژگی‌های متنی مورد مقایسه قرار گرفت. ارزیابی‌ها نشان‌دهنده عملکرد بهتر یادگیرنده پایه درخت تصمیم به‌همراه روش تجمعی تقویت‌سازی در حالت استفاده از مجموعه‌داده نامتوازن و روش تجمعی دسته‌بندی در استفاده از مجموعه‌داده متوازن و هم‌چنین تغییر محسوس‌تر عملکرد بهترین الگوریتم‌ پایه، توسط یادگیرنده‌های تجمعی، در استفاده از ویژگی‌های رفتاری نسبت به متنی است.},
keywords_fa = {نظر‌‌های هرز,یادگیری ماشین,روش‌های تجمعی,ویژگی‌های رفتاری},	
url = {https://jscit.nit.ac.ir/article_107647.html},
eprint = {https://jscit.nit.ac.ir/article_107647_bc401cb4aff47a5cda40f4f9631a3331.pdf}
}