From 26b5465f53dd719cd8682d4c9ea5e964d6a05757 Mon Sep 17 00:00:00 2001 From: s4luorth Date: Sat, 7 Feb 2026 14:03:54 +0100 Subject: [PATCH] final --- .../__pycache__/app.cpython-310.pyc | Bin 14886 -> 15365 bytes website-checker/app.py | 23 + website-checker/templates/index.html | 1476 +++++------------ 3 files changed, 472 insertions(+), 1027 deletions(-) diff --git a/website-checker/__pycache__/app.cpython-310.pyc b/website-checker/__pycache__/app.cpython-310.pyc index fc5dde59339447dca690d16cee8e4a3eef7f32a9..6611d46d1aa0232eeca2d9bf097ee8f812208a20 100644 GIT binary patch delta 2824 zcmZWrdrX_x75BY<0LI1!Ou#%1X#&PTVjck!LdwG=Bm@|krg4(Fj_(JU7#prXCxJjr zUd`HyHO*DBPFYiEtE#D@Dw36S$vSOP)-+lY?SFUDv}x)lO{y|!>!eCucg}@8%CPS5 zp8Gq`d%p91;zRY^@vNxXY}CNtH}&U3bpKmf#pKpot}heg8kY4w@l`0=rS;6ROnfge zt72!_ihWeaa#-$Joo@rPJ!SIPI$$=gGL?$mt^vDzRsd`j>zBM;af1Jf&cF&;(OJE( zniWHgmB^)|23EpK!NbK8Wk5S%mBXrlg^r~V)X3Je7r>;3ZBR;A%h|{(fvIIxVCP)1 zW7Vt4&?lX_SPfXzJy!-Mt7Ub-Zep9j53llEz69I6TB(V-Sv{C-RthHEtU+b;Pg;N7epct{O#cCZ&EzeVyL zz$aD|ovceLnx&%qse_00NWMjBWjl}13xw}iwz6Gk2+_daySnd|yLn=pQo#1GKDKvJ zOEioQ%jF!1&%t9i_5~ti@xYkkjYh+ceahijNL3tN$;Y&YCfd`$!dKMi@eKJqyNE)z+}Sef))FVxSM~62ywC?zbLF4Cem2?oQY*Hnx6E0VjLaO;EK}IC7+t3G);X)1c~rwMOedt)!nD97L+BjA;vv5i{(I8%N5N#^b9#o;GBz9T+uMwDi3ecX|hif z$`s#D9)qT4wHGr}ao&k?SO9hI}@cR`%h@A7&F9ehdLt6bK<3CtZ4t{Skuhi0;x zcTl=29#m~NUqx{pfRpf_ibQK(dN?q7m<8Iu7WK|cPK+m$^h+bY_{4nWn2*O!fN#WO z=LoqfdaC1A+3$@&rnE7Y^NZp^brrcR{^2ebR+rIm4SlW(hij0W6w|H}a!S1E>I3B7@{bVEjyEGD5KbXr51w5rr4RDJv9O+s{!KOHv*d|Q-Nbqu zO>j{72MBk>A2!eCzYU^O$A5;5eDc3Q>6&=OT}@`h8}15nU3}tpT5bZJ(#O;QSDhLB zmiP)ZmqkW>B`Fc^`VMke#Ok974E*i7VI>xWPoN_b3XZFeF=aF!VM=68j%(Vr1mz~o zT@K)+{4&Zn5Y}AHIHst6C`66NI?>qj52sfOx%8ULEgUAbh zB))B^DVL-?%V&8H)bJRd<+~Hr{(vPry5&{yer4pQ0&w8%~GTB z*@??$ehxN0PU15ukyxB7{y;1k3W3u{SeGm&j!_KHuFq$HNai|_@)eS}AXwYI@$ZMoH@OV=7R#lzNe(ks4hZTTPNhUk0% delta 2191 zcmZ9LZE#an8pre8q)AQla$6~FXqq-DrA?Z?k+d{cK&-O7YT1Hl-Q7;M=N1y$q@3JR zT2oS?>aJR*>{FIiK#Mvs;0!Z6(GgK}bdhn_@xy-FnYcTCV|Tyom-UOF;`7{g3u`jx zcc1e=&)Yrc+~*FT*Ij9+({7^Qo%$CNu{|eTUU=(7(^om+(mcYgd4?=P6{Rzl{(vZ3 zR_+%oNU2*=R%;%QiTujNN>b~^4nwcd)=>Q7npIQ^&rDH&gIGm57Bl;XtfESIDWXwS zlj@^UL!*|4Tg$E1e3aBK{31XuK~bmqmfBe@>PZQSHRRYp1;|e{E*0aLVi6P}@@TrJ z4KCIs!lZ^pGsW?%+*`0^sZob$6>a3zoYN|kEuvjpA=b_+BBGTIDIzL5W-P~`zg4ST zXcL|1U`bymx(vPD&|}N`1ESl|*OJ~M9wePgQ=au=gP}(ZedDtJkl1ACQPOv{h=-RK zdc-5fLWkBV9zCGUf$Y`RiOt6VO!WNO(sPUPbSAsBO0iXJ6WeFaU=m87F@ihuHiQ=q7+$Zd_ z<|${^nJgA12NXGnfAo~w+*9tXTR5?1RZY%y$Ry9|CBjWUj?<>I$}uz1v`LgM z>2$V7VXrAwZZT1IXVNwCF_@-QFiqOXyDY2zFIL~8N${mNrqR1oUC9~-By>Bg89W z&9a)q!J73D!E-epn8|-ovk^)q>p&=2hqcr`eLoia<8Ta<{zm9U^!LVc?9of;w+*IK z+_`L_S+H0du`lrQZy3L29A`{3a_9}zmCcYzuFtV@knue33IrgFdY}h3;gvu$%;C+z z*Kh)F)YW>PqE<34@+(&84;IA^4jK~kck2FW@pGM^Sw72&jxwB#5ymOTD`*Q&I?s|h zS+r&M?%rDY8XgaxE1D(cO^k*fR~OiAnsJ(O1~IhWd4|oEgdoV5@gE^C{0(n~PL^^a zBl}~&Y&wirW%{QhTIC~j4tVRVYm_I@x$;M>je%zgZ~Qm!X(C; zt6>WFHt&Fa_*U~*kjLqk!L9FbNk#G%?pJqfBcsD{UHiF~8BM1$8uh~Kit?mqd3r3?>Rv#?NO8?*p?m^O5`|RY-?yjR=8n#iDm0z zja@9TpIpayhB1Y=+dRsD09Uq$;cC98{Si>#;{}!R2IC5zT02?sdoqI-d6E_5BD~7h z1#FCjU>y4*0l0*RB0+bap3beeK10p>*Fd#o?cdjjm7#}eXGp^xxu{k=#*#}}nIt!JI5TRfhPEhJjp1;r7 z#z-)XFN42+Ima;W9p3=s%;dXl9b#N%e8TvQaf9&%ZtQ*#j$*dE3E%Ga!$bIGch`?8 CNjB5~ diff --git a/website-checker/app.py b/website-checker/app.py index c88e863..f9e4403 100644 --- a/website-checker/app.py +++ b/website-checker/app.py @@ -35,6 +35,20 @@ IGNORE_SELECTORS = [ CHUNK_SIZE = 9000 # stay under 10k limit +# LanguageTool rule IDs to silently ignore +IGNORED_RULES = { + "LEERZEICHEN_VOR_SATZZEICHEN", + "WHITESPACE_RULE", + "WHITESPACE_BEFORE_PUNCTUATION", + "LEERZEICHEN_VOR_DOPPELPUNKT", + "LEERZEICHEN_VOR_SEMIKOLON", + "LEERZEICHEN_VOR_AUSRUFEZEICHEN", + "LEERZEICHEN_VOR_FRAGEZEICHEN", +} + +# Regex: space(s) directly before punctuation — catch any remaining cases +SPACE_BEFORE_PUNCT_RE = re.compile(r"\s+[,.:;!?\"\u201C\u201D\u201E\u201F]") + def normalize_url(url: str) -> str: """Remove fragment and trailing slash for dedup.""" @@ -289,6 +303,15 @@ def check_text_with_languagetool( if resp.status_code == 200: result = resp.json() for match in result.get("matches", []): + rule_id = match.get("rule", {}).get("id", "") + if rule_id in IGNORED_RULES: + continue + # Extra filter: skip any match that is just whitespace before punctuation + m_off = match.get("offset", 0) + m_len = match.get("length", 0) + matched_text = chunk_text[m_off:m_off + m_len] + if SPACE_BEFORE_PUNCT_RE.fullmatch(matched_text): + continue match["offset"] += offset all_matches.append(match) elif resp.status_code in (401, 403): diff --git a/website-checker/templates/index.html b/website-checker/templates/index.html index b349a10..3505bfe 100644 --- a/website-checker/templates/index.html +++ b/website-checker/templates/index.html @@ -6,517 +6,186 @@ LanguageTool Website Checker -

LanguageTool Website Checker

← Zurück zur Übersicht -
+
Rechtschreibung
Grammatik
Stil
@@ -524,87 +193,52 @@
- -
+
In Zwischenablage kopiert
- +
-
-
-

Website prüfen

-

Crawlt alle Seiten einer Website und prüft Texte mit LanguageTool.

- -
- - -
- -
- - -
- -
- - -
- -
-
- - -
-
- - -
-
- - -
+
+

Website prüfen

+

Crawlt alle Seiten einer Website und prüft Texte mit LanguageTool.

+
+
+
+
+
+
-
+ +
+
- +
-
-
-
-
Starte...
-
-
-
-
-
-
+
+
+
Starte...
+
+
+
- +
-
-
-

Gefundene Seiten

-
-
- - - -
-
-
-
- - -
+
+

Gefundene Seiten

+
+
+ + +
-
+
+
+
+ + +
+
@@ -612,6 +246,8 @@
+ +
@@ -621,544 +257,330 @@
-
+
+} +// ==================== UTILS ==================== +function esc(s) { return s ? s.replace(/&/g,'&').replace(//g,'>').replace(/"/g,'"').replace(/'/g,''') : ''; } +function escRx(s) { return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } +document.querySelectorAll('#formView input').forEach(el => el.addEventListener('keydown', e => { if (e.key === 'Enter') startCheck(); })); +