@inproceedings{Karel24, author = {Kubicek, Karel and Merane, Jakob and Bouhoula, Ahmed and Basin, David}, title = {Automating Website Registration for Studying GDPR Compliance}, year = {2024}, isbn = {9798400701719}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3589334.3645709}, doi = {10.1145/3589334.3645709}, abstract = {Investigating how websites use sensitive user data is an active research area. However, research based on automated measurements has been limited to those websites that do not require user authentication. To overcome this limitation, we developed a crawler that automates website registrations and newsletter subscriptions and detects both security and privacy threats at scale.We demonstrate our crawler's capabilities by running it on 660k websites. We use this to identify security and privacy threats and to contextualize them within EU laws, namely the General Data Protection Regulation and ePrivacy Directive. Our methods detect private data collection over insecure HTTP connections and websites sending emails with user-provided passwords. We are also the first to apply machine learning to web forms, assessing violations of marketing consent collection requirements. Overall, we find that 37.2\% of websites send marketing emails without proper user consent. This is mostly caused by websites failing both to verify and store consent adequately. Additionally, 1.8\% of websites share users' email addresses with third parties without a transparent disclosure.}, booktitle = {Proceedings of the ACM on Web Conference 2024}, pages = {1295–1306}, numpages = {12}, keywords = {GDPR, compliance, consent, crawling, eprivacy, registration}, location = {, Singapore, Singapore, }, series = {WWW '24} }