Spaces:
Sleeping
Sleeping
| { | |
| "metadata": { | |
| "Name": "Gemma 2", | |
| "Provider": "Google", | |
| "URL": "https://ai.google.dev/gemma/docs/model_card_2", | |
| "Type": "Large Language Model", | |
| "Modalities": [ | |
| "Text-to-Text" | |
| ] | |
| }, | |
| "scores": { | |
| "1. Bias, Stereotypes, and Representational Harms Evaluation": { | |
| "1.1 Bias Detection Overview": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://ai.google.dev/gemma/docs/model_card_2#data_preprocessing", | |
| "name": "Model Card - Data Preprocessing" | |
| }, | |
| { | |
| "type": "๐", | |
| "detail": "https://developers.googleblog.com/en/gemma-explained-new-in-gemma-2/", | |
| "name": "Developer Blog" | |
| }, | |
| { | |
| "type": "๐", | |
| "detail": "https://arxiv.org/html/2410.12864", | |
| "name": "Bias Analysis Paper" | |
| } | |
| ], | |
| "questions": { | |
| "Evaluations at various stages (data collection, preprocessing, AI system architecture, training, deployment)": true, | |
| "Have intrinsic properties of the AI system been evaluated for bias (e.g., embedding analysis)": true, | |
| "Have extrinsic bias evaluations been run (e.g., downstream task performance)": true, | |
| "Have evaluations been run across all applicable modalities": true, | |
| "Have bias evaluations been run that take the form of automatic quantitative evaluation": true, | |
| "Have bias evaluations been run with human participants?": true | |
| } | |
| }, | |
| "1.2 Protected Classes and Intersectional Measures": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://ai.google.dev/gemma/docs/model_card_2#evaluation_results", | |
| "name": "Model Card - Evaluation Results" | |
| } | |
| ], | |
| "questions": { | |
| "Do evaluations cover all applicable legal protected categories for in-scope uses of the system?": true, | |
| "Do evaluations cover additional subgroups that are likely to be harmed based on other personal characteristics": false, | |
| "Evaluation of how different aspects of identity interact and compound in AI system behavior": false, | |
| "Evaluation of AI system biases for legal protected categories and additional relevant subgroups": false | |
| } | |
| }, | |
| "1.3 Measurement of Stereotypes and Harmful Associations": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://arxiv.org/abs/2009.11462", | |
| "name": "Stereotype Analysis" | |
| } | |
| ], | |
| "questions": { | |
| "Measurement of known stereotypes in AI system outputs": true, | |
| "Measurement of other negative associations and assumptions regarding specific groups": true, | |
| "Measurement of stereotypes and negative associations across in-scope contexts": false | |
| } | |
| }, | |
| "1.4 Bias Evaluation Transparency and Documentation": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://arxiv.org/pdf/2403.13793", | |
| "name": "Evaluation Documentation" | |
| } | |
| ], | |
| "questions": { | |
| "Sufficient documentation of evaluation method to understand the scope of the findings": false, | |
| "Sufficient documentation of evaluation methods to replicate findings": true, | |
| "Sufficient documentation of evaluation results to support comparison": true, | |
| "Documentation of bias mitigation measures": false, | |
| "Documentation of bias monitoring approaches": false | |
| } | |
| } | |
| }, | |
| "2. Cultural Values and Sensitive Content Evaluation": { | |
| "2.1 Cultural Variation Overview": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://aclanthology.org/2024.findings-emnlp.942.pdf", | |
| "name": "Cultural Variation Analysis" | |
| } | |
| ], | |
| "questions": { | |
| "Evaluations at various stages": false, | |
| "Have intrinsic properties been evaluated for cultural variation": false, | |
| "Have extrinsic cultural variation evaluations been run": true, | |
| "Have evaluations been run across all applicable modalities": true, | |
| "Have cultural variation evaluations been run that take the form of automatic quantitative evaluation": true, | |
| "Have cultural variation evaluations been run with human participants?": false | |
| } | |
| }, | |
| "2.2 Cultural Diversity and Representation": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Use of evaluation methods developed in the cultural contexts in scope": false, | |
| "Respect of indigenous sovereignty, protected rights, and cultural norms": false, | |
| "Evaluation of cultural variation across geographic dimensions": false, | |
| "Evaluation of cultural variation representing communities' perspectives": false, | |
| "Analysis of how cultural context affects AI system performance": false | |
| } | |
| }, | |
| "2.3 Generated Sensitive Content across Cultural Contexts": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://arxiv.org/html/2408.00118v1#S6", | |
| "name": "Content Safety Analysis" | |
| } | |
| ], | |
| "questions": { | |
| "Has the AI system been evaluated for its likelihood of facilitating generation of threatening or violent content": true, | |
| "Has the AI system been evaluated for its likelihood of facilitating generation of targeted harassment or discrimination": false, | |
| "Has the AI system been evaluated for its likelihood of facilitating generation of hate speech": false, | |
| "Has the AI system been evaluated for content embedding values not reflective of user cultural context": false, | |
| "Has the AI system been evaluated for exposing users to inappropriate content": false, | |
| "Has the AI system been evaluated for content with negative psychological impacts": true, | |
| "Has the evaluation explicitly addressed cultural variation": false | |
| } | |
| }, | |
| "2.4 Cultural Variation Transparency and Documentation": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Documentation of cultural contexts considered during development": false, | |
| "Documentation of cultural contexts covered by evaluations": false, | |
| "Sufficient documentation of evaluation method": false, | |
| "Sufficient documentation of evaluation methods to replicate findings": false, | |
| "Sufficient documentation of evaluation results": false, | |
| "Documentation of psychological impact on evaluators": false, | |
| "Documentation of evaluator well-being measures": false | |
| } | |
| } | |
| }, | |
| "3. Disparate Performance": { | |
| "3.1 Disparate Performance Overview": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Have development choices been evaluated for disparate performance contribution": false, | |
| "Have extrinsic disparate performance evaluations been run": false, | |
| "Have evaluations been run across all applicable modalities": false, | |
| "Have disparate performance evaluations been run quantitatively": false, | |
| "Have disparate performance evaluations been run with human participants": false | |
| } | |
| }, | |
| "3.2 Identifying Target Groups": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Identification of mandated target groups": false, | |
| "Identification of additional potentially harmed groups": false, | |
| "Assessment of systemic barriers in data collection": false, | |
| "Consideration of historical disparities": false, | |
| "Identification of implicit and explicit markers": false | |
| } | |
| }, | |
| "3.3 Subgroup Performance Analysis": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Non-aggregated evaluation results across subpopulations": false, | |
| "Metrics for decision-making tasks": false, | |
| "Metrics for other tasks including generative": false, | |
| "Worst-case subgroup performance analysis": false, | |
| "Intersectional analysis": false, | |
| "Evaluation of implicit social group markers": false | |
| } | |
| }, | |
| "3.4 Transparency and Documentation": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Documentation of evaluation method scope": false, | |
| "Documentation of evaluation methods for replication": false, | |
| "Documentation of evaluation results for comparison": false, | |
| "Documentation of mitigation measures": false, | |
| "Documentation of monitoring approaches": false | |
| } | |
| } | |
| }, | |
| "4. Environmental Costs and Carbon Emissions Evaluation": { | |
| "4.1 Environmental Costs Overview": { | |
| "status": "N/A", | |
| "sources": [], | |
| "questions": { | |
| "Evaluations of different processes": false, | |
| "Evaluations across modalities": false, | |
| "Evaluations on standardized benchmarks": false, | |
| "Community feedback consideration": false, | |
| "Full supply chain consideration": false | |
| } | |
| }, | |
| "4.2 Development Impact": { | |
| "status": "N/A", | |
| "sources": [], | |
| "questions": { | |
| "FLOPS accounting": false, | |
| "Energy consumption evaluation": false, | |
| "Carbon impact evaluation": false, | |
| "Hardware lifecycle evaluation": false | |
| } | |
| }, | |
| "4.3 Deployment Impact": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://cloud.google.com/blog/products/ai-machine-learning/performance-deepdive-of-gemma-on-google-cloud", | |
| "name": "Performance Analysis" | |
| } | |
| ], | |
| "questions": { | |
| "Evaluation of inference FLOPS": true, | |
| "Evaluation of common deployment energy consumption": false, | |
| "Evaluation across deployment settings": false, | |
| "Evaluation of task-specific variations": false, | |
| "Evaluation of deployment carbon impact": false, | |
| "Evaluation of deployment hardware lifecycle": false | |
| } | |
| }, | |
| "4.4 Documentation": { | |
| "status": "N/A", | |
| "sources": [], | |
| "questions": { | |
| "Equipment and infrastructure documentation": false, | |
| "Evaluation methods documentation": false, | |
| "Results documentation": false, | |
| "Documentation for comparison": false | |
| } | |
| } | |
| }, | |
| "5. Privacy and Data Protection Evaluation": { | |
| "5.1 Overview": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://arxiv.org/pdf/2408.00118", | |
| "name": "Privacy Evaluation" | |
| } | |
| ], | |
| "questions": { | |
| "Evaluations at various stages": true, | |
| "Intrinsic privacy vulnerability evaluation": false, | |
| "Extrinsic privacy evaluations": true, | |
| "Evaluations across modalities": false, | |
| "Quantitative privacy evaluations": true, | |
| "Human participant privacy evaluations": false | |
| } | |
| }, | |
| "5.2 Privacy Harms": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://arxiv.org/pdf/2408.00118", | |
| "name": "Privacy Analysis" | |
| } | |
| ], | |
| "questions": { | |
| "Personal information revelation evaluation": true, | |
| "Content impersonation evaluation": true, | |
| "Personal information confabulation evaluation": true | |
| } | |
| }, | |
| "5.3 IP and Security": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://www.cio.com/article/3567106/latticeflow-launches-first-comprehensive-evaluation-framework-for-compliance-with-the-eu-ai-act.html", | |
| "name": "Security Evaluation" | |
| } | |
| ], | |
| "questions": { | |
| "Training data reproduction evaluation": true, | |
| "Information security risk evaluation": false | |
| } | |
| }, | |
| "5.4 Documentation": { | |
| "status": "Yes", | |
| "sources": [ | |
| { | |
| "type": "๐", | |
| "detail": "https://ai.google.dev/gemma/docs/model_card_2", | |
| "name": "Model Card Documentation" | |
| } | |
| ], | |
| "questions": { | |
| "Evaluation methods documentation": false, | |
| "Results documentation": false, | |
| "Limitations documentation": true, | |
| "Deployment considerations documentation": false, | |
| "Training data documentation": false | |
| } | |
| } | |
| }, | |
| "6. Financial Costs Evaluation": { | |
| "6.1 Overview": { | |
| "status": "N/A", | |
| "sources": [], | |
| "questions": { | |
| "Cost evaluation across stages": false, | |
| "Component cost evaluation": false, | |
| "Modality cost evaluation": false, | |
| "Direct and indirect expense evaluation": false, | |
| "Cost projection validation": false | |
| } | |
| }, | |
| "6.2 Development Costs": { | |
| "status": "N/A", | |
| "sources": [], | |
| "questions": { | |
| "R&D labor costs": false, | |
| "Data collection costs": false, | |
| "Infrastructure costs": false, | |
| "Training approach costs": false, | |
| "Architecture impact costs": false | |
| } | |
| }, | |
| "6.3 Operation Costs": { | |
| "status": "N/A", | |
| "sources": [], | |
| "questions": { | |
| "Inference costs": false, | |
| "Storage costs": false, | |
| "Scaling costs": false, | |
| "Deployment context costs": false, | |
| "Update costs": false | |
| } | |
| }, | |
| "6.4 Documentation": { | |
| "status": "N/A", | |
| "sources": [], | |
| "questions": { | |
| "Methodology documentation": false, | |
| "Cost breakdown documentation": false, | |
| "Usage scenario documentation": false, | |
| "Projection documentation": false | |
| } | |
| } | |
| }, | |
| "7. Data and Content Moderation Labor Evaluation": { | |
| "7.1 Overview": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Labor practice evaluation": false, | |
| "Worker category evaluation": false, | |
| "Task type evaluation": false, | |
| "Industry standard evaluation": false, | |
| "Worker type evaluation": false, | |
| "Regional context evaluation": false | |
| } | |
| }, | |
| "7.2 Working Conditions": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Compensation assessment": false, | |
| "Job security assessment": false, | |
| "Workplace safety evaluation": false, | |
| "Worker autonomy assessment": false, | |
| "Power dynamics evaluation": false | |
| } | |
| }, | |
| "7.3 Worker Wellbeing": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Support system assessment": false, | |
| "Content preparation evaluation": false, | |
| "Cultural support evaluation": false | |
| } | |
| }, | |
| "7.4 Documentation": { | |
| "status": "No", | |
| "sources": [], | |
| "questions": { | |
| "Methodology documentation": false, | |
| "Demographics documentation": false, | |
| "Support system documentation": false, | |
| "Incident reporting documentation": false | |
| } | |
| } | |
| } | |
| } | |
| } | |