chore(audit): remove non-JSON files from PR

2025-12-27 09:26:25 +01:00 · 2025-12-26 18:16:26 +01:00
parent 7f2c7ccee5
commit 11f42e4e81
4 changed files with 0 additions and 2198 deletions
--- a/frontend/public/json/audit_category_report.md
+++ b/frontend/public/json/audit_category_report.md
@ -1,10 +0,0 @@
-# Audit Report: JSON Categories
-
- Total files scanned: 432
- Files with parse errors: 0
- Files with questionable/missing categories: 1
-
-## Problematic files
-
- **versions.json**
-  - Note: no_category_field
--- a/frontend/public/json/semantic_audit_report.md
+++ b/frontend/public/json/semantic_audit_report.md
--- a/scripts/audit_json_categories.py
+++ b/scripts/audit_json_categories.py
@ -1,160 +0,0 @@
-#!/usr/bin/env python3
-import json
-from pathlib import Path
-
-ROOT = Path(__file__).resolve().parents[1]
-JSON_DIR = ROOT / 'frontend' / 'public' / 'json'
-METADATA_FILE = JSON_DIR / 'metadata.json'
-REPORT_MD = JSON_DIR / 'audit_category_report.md'
-REPORT_JSON = JSON_DIR / 'audit_category_report.json'
-
-
-def load_metadata():
-    with METADATA_FILE.open() as f:
-        md = json.load(f)
-    cats = {}
-    for c in md.get('categories', []):
-        try:
-            cid = int(c.get('id'))
-        except Exception:
-            continue
-        cats[cid] = c
-
-    # Also create name->id map (lowercased)
-    name_map = {c.get('name','').lower(): int(c.get('id')) for c in md.get('categories', []) if 'name' in c and 'id' in c}
-    return cats, name_map
-
-
-def normalize_value(v):
-    if v is None:
-        return None
-    if isinstance(v, (int, float)):
-        return int(v)
-    if isinstance(v, str):
-        s = v.strip()
-        if s.isdigit():
-            return int(s)
-        return s.lower()
-    return v
-
-
-def check_file(p, cats_by_id, name_map):
-    try:
-        j = json.loads(p.read_text())
-    except Exception as e:
-        return {'file': str(p.name), 'error': f'json_parse_error: {e}'}
-
-    found = []
-    notes = []
-
-    # look for common keys
-    keys_to_check = ['category_id', 'category', 'categories']
-    for key in keys_to_check:
-        if key in j:
-            val = j[key]
-            if isinstance(val, list):
-                for item in val:
-                    nv = normalize_value(item)
-                    found.append((key, nv))
-            else:
-                nv = normalize_value(val)
-                found.append((key, nv))
-
-    # also check top-level keys that might indicate category
-    if not found:
-        for alt in ['tags', 'type']:
-            if alt in j:
-                val = j[alt]
-                if isinstance(val, list):
-                    for item in val:
-                        found.append((alt, normalize_value(item)))
-                else:
-                    found.append((alt, normalize_value(val)))
-
-    if not found:
-        notes.append('no_category_field')
-        return {'file': str(p.name), 'found': [], 'notes': notes}
-
-    mapped = []
-    for key, val in found:
-        if isinstance(val, int):
-            if val in cats_by_id:
-                mapped.append({'key': key, 'value': val, 'mapped_to': cats_by_id[val]['name']})
-            else:
-                mapped.append({'key': key, 'value': val, 'mapped_to': None})
-                notes.append(f'unknown_category_id:{val}')
-        elif isinstance(val, str):
-            # try name map
-            if val in name_map:
-                cid = name_map[val]
-                mapped.append({'key': key, 'value': val, 'mapped_to': cats_by_id[cid]['name']})
-            else:
-                mapped.append({'key': key, 'value': val, 'mapped_to': None})
-                notes.append(f'unknown_category_name:{val}')
-        else:
-            mapped.append({'key': key, 'value': val, 'mapped_to': None})
-            notes.append(f'unhandled_value_type:{type(val)}')
-
-    return {'file': str(p.name), 'found': mapped, 'notes': notes}
-
-
-def main():
-    cats_by_id, name_map = load_metadata()
-    report = {'summary': {'total_files': 0, 'errors': 0, 'questionable': 0}, 'files': []}
-
-    for p in sorted(JSON_DIR.glob('*.json')):
-        if p.name == METADATA_FILE.name:
-            continue
-        report['summary']['total_files'] += 1
-        res = check_file(p, cats_by_id, name_map)
-        if 'error' in res:
-            report['summary']['errors'] += 1
-            report['files'].append(res)
-            continue
-        # determine if questionable: any mapped_to is None or notes
-        questionable = False
-        for f in res.get('found', []):
-            if f.get('mapped_to') is None:
-                questionable = True
-        if res.get('notes'):
-            questionable = True
-        if questionable:
-            report['summary']['questionable'] += 1
-        report['files'].append(res)
-
-    # write JSON report
-    REPORT_JSON.write_text(json.dumps(report, indent=2))
-
-    # write MD summary
-    lines = []
-    lines.append('# Audit Report: JSON Categories')
-    lines.append('')
-    lines.append(f"- Total files scanned: {report['summary']['total_files']}")
-    lines.append(f"- Files with parse errors: {report['summary']['errors']}")
-    lines.append(f"- Files with questionable/missing categories: {report['summary']['questionable']}")
-    lines.append('')
-    lines.append('## Problematic files')
-    lines.append('')
-    for f in report['files']:
-        if f.get('notes') or any(x.get('mapped_to') is None for x in f.get('found', [])):
-            lines.append(f"- **{f['file']}**")
-            if 'error' in f:
-                lines.append(f"  - Error: {f['error']}")
-            if f.get('found'):
-                for found in f['found']:
-                    lines.append(f"  - Field `{found['key']}` => `{found['value']}` mapped_to: `{found.get('mapped_to')}`")
-            if f.get('notes'):
-                for n in f['notes']:
-                    lines.append(f"  - Note: {n}")
-            lines.append('')
-
-    REPORT_MD.write_text('\n'.join(lines))
-    print('Audit complete:')
-    print(f"  Total: {report['summary']['total_files']}")
-    print(f"  Questionable: {report['summary']['questionable']}")
-    print(f"  Errors: {report['summary']['errors']}")
-    print(f"Wrote: {REPORT_JSON} and {REPORT_MD}")
-
-
-if __name__ == '__main__':
-    main()
--- a/scripts/audit_json_category_semantic.py
+++ b/scripts/audit_json_category_semantic.py
@ -1,180 +0,0 @@
-#!/usr/bin/env python3
-import json
-from pathlib import Path
-import re
-from collections import Counter
-
-ROOT = Path(__file__).resolve().parents[1]
-JSON_DIR = ROOT / 'frontend' / 'public' / 'json'
-METADATA_FILE = JSON_DIR / 'metadata.json'
-REPORT_JSON = JSON_DIR / 'semantic_audit_report.json'
-REPORT_MD = JSON_DIR / 'semantic_audit_report.md'
-
-STOPWORDS = set(["the","and","of","in","a","to","with","for","on","is","an","by","as","or","all","tools","solutions","manage","management","system","systems","service","services"])
-
-
-def tokens(text):
-    if not text:
-        return []
-    text = text.lower()
-    text = re.sub(r"[^a-z0-9]+", " ", text)
-    toks = [t for t in text.split() if t and t not in STOPWORDS and len(t) > 1]
-    return toks
-
-
-def load_categories():
-    md = json.loads(METADATA_FILE.read_text())
-    cats = {}
-    for c in md.get('categories', []):
-        try:
-            cid = int(c.get('id'))
-        except Exception:
-            continue
-        name = c.get('name','')
-        desc = c.get('description','')
-        kt = set(tokens(name) + tokens(desc))
-        # also add raw name token
-        cats[cid] = {'id': cid, 'name': name, 'desc': desc, 'keywords': kt}
-    return cats
-
-
-def score_text_against_category(text_tokens, cat_keywords):
-    if not text_tokens or not cat_keywords:
-        return 0
-    cnt = 0
-    for t in text_tokens:
-        if t in cat_keywords:
-            cnt += 1
-    # simple score: count
-    return cnt
-
-
-def analyze_file(p, cats):
-    try:
-        obj = json.loads(p.read_text())
-    except Exception as e:
-        return {'file': p.name, 'error': f'parse_error: {e}'}
-
-    # if the JSON is not an object (e.g., array of versions), we cannot determine category
-    if not isinstance(obj, dict):
-        return {'file': p.name, 'found': [], 'questionable': True, 'reasons': ['no_category_field']}
-
-    # gather text
-    parts = []
-    for k in ['name','description','slug','type','documentation','website']:
-        v = obj.get(k)
-        if isinstance(v, list):
-            parts.extend([str(x) for x in v if x])
-        elif v:
-            parts.append(str(v))
-    # include install script path and notes
-    for k in ['script','install_methods','notes','tags']:
-        v = obj.get(k)
-        if not v:
-            continue
-        if isinstance(v, list):
-            for item in v:
-                parts.append(json.dumps(item) if isinstance(item, (dict,list)) else str(item))
-        elif isinstance(v, dict):
-            parts.append(json.dumps(v))
-        else:
-            parts.append(str(v))
-
-    text = " ".join(parts)
-    tks = tokens(text)
-    if not tks:
-        return {'file': p.name, 'found': [], 'notes': ['no_text_to_analyze']}
-
-    scores = []
-    for cid, c in cats.items():
-        sc = score_text_against_category(tks, c['keywords'])
-        if sc > 0:
-            scores.append({'id': cid, 'name': c['name'], 'score': sc})
-    scores = sorted(scores, key=lambda x: (-x['score'], x['name']))
-
-    # determine current categories
-    current = []
-    raw = obj.get('categories') or obj.get('category')
-    if isinstance(raw, list):
-        current = raw
-    elif raw is not None:
-        current = [raw]
-
-    # normalize to ints where possible
-    normalized_current = []
-    for v in current:
-        try:
-            normalized_current.append(int(v))
-        except Exception:
-            # maybe it's a name; try to match by name
-            for cid,c in cats.items():
-                if isinstance(v,str) and v.strip().lower() == c['name'].lower():
-                    normalized_current.append(cid)
-                    break
-
-    # decide if questionable
-    questionable = False
-    reasons = []
-    if not normalized_current:
-        questionable = True
-        reasons.append('no_category_assigned')
-    else:
-        # if none of current in top 3 suggestions and top suggestion has score>0
-        top_ids = [s['id'] for s in scores[:3]]
-        if scores and all(cid not in top_ids for cid in normalized_current):
-            questionable = True
-            reasons.append('assigned_not_in_top_suggestions')
-
-    return {'file': p.name, 'current': normalized_current, 'suggestions': scores[:5], 'questionable': questionable, 'reasons': reasons}
-
-
-def main():
-    cats = load_categories()
-    report = {'summary': {'total': 0, 'questionable': 0, 'errors': 0}, 'files': []}
-    for p in sorted(JSON_DIR.glob('*.json')):
-        if p.name == METADATA_FILE.name:
-            continue
-        report['summary']['total'] += 1
-        res = analyze_file(p, cats)
-        if 'error' in res:
-            report['summary']['errors'] += 1
-        if res.get('questionable'):
-            report['summary']['questionable'] += 1
-        report['files'].append(res)
-
-    REPORT_JSON.write_text(json.dumps(report, indent=2))
-
-    lines = []
-    lines.append('# Semantic Audit Report: Category Suggestions')
-    lines.append('')
-    lines.append(f"- Total files scanned: {report['summary']['total']}")
-    lines.append(f"- Files with parse errors: {report['summary']['errors']}")
-    lines.append(f"- Files flagged as questionable: {report['summary']['questionable']}")
-    lines.append('')
-    lines.append('## Flagged files and suggestions')
-    lines.append('')
-    for f in report['files']:
-        if f.get('questionable') or f.get('error'):
-            lines.append(f"- **{f['file']}**")
-            if f.get('error'):
-                lines.append(f"  - Error: {f['error']}")
-            if f.get('current'):
-                lines.append(f"  - Current categories: {f['current']}")
-            if f.get('suggestions'):
-                for s in f['suggestions']:
-                    lines.append(f"  - Suggestion: {s['id']} {s['name']} (score={s['score']})")
-            if f.get('reasons'):
-                for r in f['reasons']:
-                    lines.append(f"  - Reason: {r}")
-            lines.append('')
-
-    REPORT_MD.write_text('\n'.join(lines))
-    print('Semantic audit complete:')
-    print(f"  Total: {report['summary']['total']}")
-    print(f"  Questionable: {report['summary']['questionable']}")
-    print(f"  Errors: {report['summary']['errors']}")
-    print(f"Wrote: {REPORT_JSON} and {REPORT_MD}")
-
-
-if __name__ == '__main__':
-    main()