mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2025-12-27 09:26:25 +01:00
chore(audit): remove non-JSON files from PR
This commit is contained in:
@ -1,10 +0,0 @@
|
||||
# Audit Report: JSON Categories
|
||||
|
||||
- Total files scanned: 432
|
||||
- Files with parse errors: 0
|
||||
- Files with questionable/missing categories: 1
|
||||
|
||||
## Problematic files
|
||||
|
||||
- **versions.json**
|
||||
- Note: no_category_field
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,160 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
JSON_DIR = ROOT / 'frontend' / 'public' / 'json'
|
||||
METADATA_FILE = JSON_DIR / 'metadata.json'
|
||||
REPORT_MD = JSON_DIR / 'audit_category_report.md'
|
||||
REPORT_JSON = JSON_DIR / 'audit_category_report.json'
|
||||
|
||||
|
||||
def load_metadata():
|
||||
with METADATA_FILE.open() as f:
|
||||
md = json.load(f)
|
||||
cats = {}
|
||||
for c in md.get('categories', []):
|
||||
try:
|
||||
cid = int(c.get('id'))
|
||||
except Exception:
|
||||
continue
|
||||
cats[cid] = c
|
||||
|
||||
# Also create name->id map (lowercased)
|
||||
name_map = {c.get('name','').lower(): int(c.get('id')) for c in md.get('categories', []) if 'name' in c and 'id' in c}
|
||||
return cats, name_map
|
||||
|
||||
|
||||
def normalize_value(v):
|
||||
if v is None:
|
||||
return None
|
||||
if isinstance(v, (int, float)):
|
||||
return int(v)
|
||||
if isinstance(v, str):
|
||||
s = v.strip()
|
||||
if s.isdigit():
|
||||
return int(s)
|
||||
return s.lower()
|
||||
return v
|
||||
|
||||
|
||||
def check_file(p, cats_by_id, name_map):
|
||||
try:
|
||||
j = json.loads(p.read_text())
|
||||
except Exception as e:
|
||||
return {'file': str(p.name), 'error': f'json_parse_error: {e}'}
|
||||
|
||||
found = []
|
||||
notes = []
|
||||
|
||||
# look for common keys
|
||||
keys_to_check = ['category_id', 'category', 'categories']
|
||||
for key in keys_to_check:
|
||||
if key in j:
|
||||
val = j[key]
|
||||
if isinstance(val, list):
|
||||
for item in val:
|
||||
nv = normalize_value(item)
|
||||
found.append((key, nv))
|
||||
else:
|
||||
nv = normalize_value(val)
|
||||
found.append((key, nv))
|
||||
|
||||
# also check top-level keys that might indicate category
|
||||
if not found:
|
||||
for alt in ['tags', 'type']:
|
||||
if alt in j:
|
||||
val = j[alt]
|
||||
if isinstance(val, list):
|
||||
for item in val:
|
||||
found.append((alt, normalize_value(item)))
|
||||
else:
|
||||
found.append((alt, normalize_value(val)))
|
||||
|
||||
if not found:
|
||||
notes.append('no_category_field')
|
||||
return {'file': str(p.name), 'found': [], 'notes': notes}
|
||||
|
||||
mapped = []
|
||||
for key, val in found:
|
||||
if isinstance(val, int):
|
||||
if val in cats_by_id:
|
||||
mapped.append({'key': key, 'value': val, 'mapped_to': cats_by_id[val]['name']})
|
||||
else:
|
||||
mapped.append({'key': key, 'value': val, 'mapped_to': None})
|
||||
notes.append(f'unknown_category_id:{val}')
|
||||
elif isinstance(val, str):
|
||||
# try name map
|
||||
if val in name_map:
|
||||
cid = name_map[val]
|
||||
mapped.append({'key': key, 'value': val, 'mapped_to': cats_by_id[cid]['name']})
|
||||
else:
|
||||
mapped.append({'key': key, 'value': val, 'mapped_to': None})
|
||||
notes.append(f'unknown_category_name:{val}')
|
||||
else:
|
||||
mapped.append({'key': key, 'value': val, 'mapped_to': None})
|
||||
notes.append(f'unhandled_value_type:{type(val)}')
|
||||
|
||||
return {'file': str(p.name), 'found': mapped, 'notes': notes}
|
||||
|
||||
|
||||
def main():
|
||||
cats_by_id, name_map = load_metadata()
|
||||
report = {'summary': {'total_files': 0, 'errors': 0, 'questionable': 0}, 'files': []}
|
||||
|
||||
for p in sorted(JSON_DIR.glob('*.json')):
|
||||
if p.name == METADATA_FILE.name:
|
||||
continue
|
||||
report['summary']['total_files'] += 1
|
||||
res = check_file(p, cats_by_id, name_map)
|
||||
if 'error' in res:
|
||||
report['summary']['errors'] += 1
|
||||
report['files'].append(res)
|
||||
continue
|
||||
# determine if questionable: any mapped_to is None or notes
|
||||
questionable = False
|
||||
for f in res.get('found', []):
|
||||
if f.get('mapped_to') is None:
|
||||
questionable = True
|
||||
if res.get('notes'):
|
||||
questionable = True
|
||||
if questionable:
|
||||
report['summary']['questionable'] += 1
|
||||
report['files'].append(res)
|
||||
|
||||
# write JSON report
|
||||
REPORT_JSON.write_text(json.dumps(report, indent=2))
|
||||
|
||||
# write MD summary
|
||||
lines = []
|
||||
lines.append('# Audit Report: JSON Categories')
|
||||
lines.append('')
|
||||
lines.append(f"- Total files scanned: {report['summary']['total_files']}")
|
||||
lines.append(f"- Files with parse errors: {report['summary']['errors']}")
|
||||
lines.append(f"- Files with questionable/missing categories: {report['summary']['questionable']}")
|
||||
lines.append('')
|
||||
lines.append('## Problematic files')
|
||||
lines.append('')
|
||||
for f in report['files']:
|
||||
if f.get('notes') or any(x.get('mapped_to') is None for x in f.get('found', [])):
|
||||
lines.append(f"- **{f['file']}**")
|
||||
if 'error' in f:
|
||||
lines.append(f" - Error: {f['error']}")
|
||||
if f.get('found'):
|
||||
for found in f['found']:
|
||||
lines.append(f" - Field `{found['key']}` => `{found['value']}` mapped_to: `{found.get('mapped_to')}`")
|
||||
if f.get('notes'):
|
||||
for n in f['notes']:
|
||||
lines.append(f" - Note: {n}")
|
||||
lines.append('')
|
||||
|
||||
REPORT_MD.write_text('\n'.join(lines))
|
||||
print('Audit complete:')
|
||||
print(f" Total: {report['summary']['total_files']}")
|
||||
print(f" Questionable: {report['summary']['questionable']}")
|
||||
print(f" Errors: {report['summary']['errors']}")
|
||||
print(f"Wrote: {REPORT_JSON} and {REPORT_MD}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,180 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
from pathlib import Path
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
JSON_DIR = ROOT / 'frontend' / 'public' / 'json'
|
||||
METADATA_FILE = JSON_DIR / 'metadata.json'
|
||||
REPORT_JSON = JSON_DIR / 'semantic_audit_report.json'
|
||||
REPORT_MD = JSON_DIR / 'semantic_audit_report.md'
|
||||
|
||||
STOPWORDS = set(["the","and","of","in","a","to","with","for","on","is","an","by","as","or","all","tools","solutions","manage","management","system","systems","service","services"])
|
||||
|
||||
|
||||
def tokens(text):
|
||||
if not text:
|
||||
return []
|
||||
text = text.lower()
|
||||
text = re.sub(r"[^a-z0-9]+", " ", text)
|
||||
toks = [t for t in text.split() if t and t not in STOPWORDS and len(t) > 1]
|
||||
return toks
|
||||
|
||||
|
||||
def load_categories():
|
||||
md = json.loads(METADATA_FILE.read_text())
|
||||
cats = {}
|
||||
for c in md.get('categories', []):
|
||||
try:
|
||||
cid = int(c.get('id'))
|
||||
except Exception:
|
||||
continue
|
||||
name = c.get('name','')
|
||||
desc = c.get('description','')
|
||||
kt = set(tokens(name) + tokens(desc))
|
||||
# also add raw name token
|
||||
cats[cid] = {'id': cid, 'name': name, 'desc': desc, 'keywords': kt}
|
||||
return cats
|
||||
|
||||
|
||||
def score_text_against_category(text_tokens, cat_keywords):
|
||||
if not text_tokens or not cat_keywords:
|
||||
return 0
|
||||
cnt = 0
|
||||
for t in text_tokens:
|
||||
if t in cat_keywords:
|
||||
cnt += 1
|
||||
# simple score: count
|
||||
return cnt
|
||||
|
||||
|
||||
def analyze_file(p, cats):
|
||||
try:
|
||||
obj = json.loads(p.read_text())
|
||||
except Exception as e:
|
||||
return {'file': p.name, 'error': f'parse_error: {e}'}
|
||||
|
||||
# if the JSON is not an object (e.g., array of versions), we cannot determine category
|
||||
if not isinstance(obj, dict):
|
||||
return {'file': p.name, 'found': [], 'questionable': True, 'reasons': ['no_category_field']}
|
||||
|
||||
# gather text
|
||||
parts = []
|
||||
for k in ['name','description','slug','type','documentation','website']:
|
||||
v = obj.get(k)
|
||||
if isinstance(v, list):
|
||||
parts.extend([str(x) for x in v if x])
|
||||
elif v:
|
||||
parts.append(str(v))
|
||||
# include install script path and notes
|
||||
for k in ['script','install_methods','notes','tags']:
|
||||
v = obj.get(k)
|
||||
if not v:
|
||||
continue
|
||||
if isinstance(v, list):
|
||||
for item in v:
|
||||
parts.append(json.dumps(item) if isinstance(item, (dict,list)) else str(item))
|
||||
elif isinstance(v, dict):
|
||||
parts.append(json.dumps(v))
|
||||
else:
|
||||
parts.append(str(v))
|
||||
|
||||
text = " ".join(parts)
|
||||
tks = tokens(text)
|
||||
if not tks:
|
||||
return {'file': p.name, 'found': [], 'notes': ['no_text_to_analyze']}
|
||||
|
||||
scores = []
|
||||
for cid, c in cats.items():
|
||||
sc = score_text_against_category(tks, c['keywords'])
|
||||
if sc > 0:
|
||||
scores.append({'id': cid, 'name': c['name'], 'score': sc})
|
||||
scores = sorted(scores, key=lambda x: (-x['score'], x['name']))
|
||||
|
||||
# determine current categories
|
||||
current = []
|
||||
raw = obj.get('categories') or obj.get('category')
|
||||
if isinstance(raw, list):
|
||||
current = raw
|
||||
elif raw is not None:
|
||||
current = [raw]
|
||||
|
||||
# normalize to ints where possible
|
||||
normalized_current = []
|
||||
for v in current:
|
||||
try:
|
||||
normalized_current.append(int(v))
|
||||
except Exception:
|
||||
# maybe it's a name; try to match by name
|
||||
for cid,c in cats.items():
|
||||
if isinstance(v,str) and v.strip().lower() == c['name'].lower():
|
||||
normalized_current.append(cid)
|
||||
break
|
||||
|
||||
# decide if questionable
|
||||
questionable = False
|
||||
reasons = []
|
||||
if not normalized_current:
|
||||
questionable = True
|
||||
reasons.append('no_category_assigned')
|
||||
else:
|
||||
# if none of current in top 3 suggestions and top suggestion has score>0
|
||||
top_ids = [s['id'] for s in scores[:3]]
|
||||
if scores and all(cid not in top_ids for cid in normalized_current):
|
||||
questionable = True
|
||||
reasons.append('assigned_not_in_top_suggestions')
|
||||
|
||||
return {'file': p.name, 'current': normalized_current, 'suggestions': scores[:5], 'questionable': questionable, 'reasons': reasons}
|
||||
|
||||
|
||||
def main():
|
||||
cats = load_categories()
|
||||
report = {'summary': {'total': 0, 'questionable': 0, 'errors': 0}, 'files': []}
|
||||
for p in sorted(JSON_DIR.glob('*.json')):
|
||||
if p.name == METADATA_FILE.name:
|
||||
continue
|
||||
report['summary']['total'] += 1
|
||||
res = analyze_file(p, cats)
|
||||
if 'error' in res:
|
||||
report['summary']['errors'] += 1
|
||||
if res.get('questionable'):
|
||||
report['summary']['questionable'] += 1
|
||||
report['files'].append(res)
|
||||
|
||||
REPORT_JSON.write_text(json.dumps(report, indent=2))
|
||||
|
||||
lines = []
|
||||
lines.append('# Semantic Audit Report: Category Suggestions')
|
||||
lines.append('')
|
||||
lines.append(f"- Total files scanned: {report['summary']['total']}")
|
||||
lines.append(f"- Files with parse errors: {report['summary']['errors']}")
|
||||
lines.append(f"- Files flagged as questionable: {report['summary']['questionable']}")
|
||||
lines.append('')
|
||||
lines.append('## Flagged files and suggestions')
|
||||
lines.append('')
|
||||
for f in report['files']:
|
||||
if f.get('questionable') or f.get('error'):
|
||||
lines.append(f"- **{f['file']}**")
|
||||
if f.get('error'):
|
||||
lines.append(f" - Error: {f['error']}")
|
||||
if f.get('current'):
|
||||
lines.append(f" - Current categories: {f['current']}")
|
||||
if f.get('suggestions'):
|
||||
for s in f['suggestions']:
|
||||
lines.append(f" - Suggestion: {s['id']} {s['name']} (score={s['score']})")
|
||||
if f.get('reasons'):
|
||||
for r in f['reasons']:
|
||||
lines.append(f" - Reason: {r}")
|
||||
lines.append('')
|
||||
|
||||
REPORT_MD.write_text('\n'.join(lines))
|
||||
print('Semantic audit complete:')
|
||||
print(f" Total: {report['summary']['total']}")
|
||||
print(f" Questionable: {report['summary']['questionable']}")
|
||||
print(f" Errors: {report['summary']['errors']}")
|
||||
print(f"Wrote: {REPORT_JSON} and {REPORT_MD}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user