# patch hunk comments ^\@\@ -\d+(?:,\d+|) \+\d+(?:,\d+|) \@\@ .* # git index header index [0-9a-z]{7,40}\.\.[0-9a-z]{7,40} # cid urls (['"])cid:.*?\g{-1} # data urls \(data:.*?\) (['"])data:.*?\g{-1} data:[-a-zA-Z=;:/0-9+]*,\S* # mailto urls mailto:[-a-zA-Z=;:/?%&0-9+]* # magnet urls magnet:[?=:\w]+ # ANSI color codes \\u001b\[\d+(?:;\d+|)m # URL escaped characters \%[0-9A-F]{2} # IPv6 \b(?:[0-9a-f]{0,4}:){5}[0-9a-f]{0,4}\b # c99 hex digits (not the full format, just one I've seen) 0x[0-9a-fA-F](?:\.[0-9a-fA-F]*|)[pP] # Punycode \bxn--[-0-9a-z]+ # sha256 sha256:[0-9a-f]+ # sha-... -- uses a fancy capture (['"]|")[0-9a-f]{40,}\g{-1} # hex in url queries =[0-9a-fA-F]+& # ssh (?:ssh-\S+|-nistp256) [-a-zA-Z=;:/0-9+]* # PGP \b(?:[0-9A-F]{4} ){9}[0-9A-F]{4}\b # uuid: [<({"'>][0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}[<'"})>] # hex digits including css/html color classes: (?:[\\0][xX]|\\u|[uU]\+|#x?|\%23)[0-9a-fA-FgGrR_]{2,}(?:[uUlL]{0,3}|u\d+)\b # integrity integrity="sha\d+-[-a-zA-Z=;:/0-9+]{40,}" # .desktop mime types ^MimeTypes?=.*$ # .desktop localized entries ^[A-Z][a-z]+\[[a-z]+\]=.*$ # IServiceProvider \bI(?=(?:[A-Z][a-z]{2,})+\b) # crypt "\$2[ayb]\$.{56}" # Input to GitHub JSON content: "[-a-zA-Z=;:/0-9+]*=" # Python stringprefix / binaryprefix \b(?:B|BR|Br|F|FR|Fr|R|RB|RF|Rb|Rf|U|UR|Ur|b|bR|br|f|fR|fr|r|rB|rF|rb|rf|u|uR|ur)' # Regular expressions for (P|p)assword \([A-Z]\|[a-z]\)[a-z]+ # JavaScript regular expressions /.*/[gim]*\.test\( \.replace\(/[^/]*/[gim]*\s*, # Go regular expressions regexp\.MustCompile\(`[^`]*`\) # kubernetes pod status lists # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase \w+(?:-\w+)+\s+\d+/\d+\s+(?:Running|Pending|Succeeded|Failed|Unknown)\s+ # kubectl - pods in CrashLoopBackOff \w+-[0-9a-f]+-\w+\s+\d+/\d+\s+CrashLoopBackOff\s+ # posthog secrets posthog\.init\((['"])phc_[^"',]+\g{-1}, # Update Lorem based on your content (requires `ge` and `w` from https://github.com/jsoref/spelling; and `review` from https://github.com/check-spelling/check-spelling/wiki/Looking-for-items-locally ) # grep lorem .github/actions/spelling/patterns.txt|perl -pne 's/.*i..\?://;s/\).*//' |tr '|' "\n"|sort -f |xargs -n1 ge|perl -pne 's/^[^:]*://'|sort -u|w|sed -e 's/ .*//'|w|review - # Warning, while `(?i)` is very neat and fancy, if you have some binary files that aren't proper unicode, you might run into: ## Operation "substitution (s///)" returns its argument for non-Unicode code point 0x1C19AE (the code point will vary). ## You could manually change `(?i)X...` to use `[Xx]...` ## or you could add the files to your `excludes` file (a version after 0.0.19 should identify the file path) # Lorem (?:\w|\s|[,.])*\b(?i)(?:amet|consectetur|cursus|dolor|eros|ipsum|lacus|libero|ligula|lorem|magna|neque|nulla|suscipit|tempus)\b(?:\w|\s|[,.])* # Non-English [a-zA-Z]*[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŸŽž][a-zA-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŸŽž]* # French # This corpus only had capital letters, but you probably want lowercase ones as well. \b[LN]'+[a-z]+\b # the negative lookahead here is to allow catching 'templatesz' as a misspelling # but to otherwise recognize a Windows path with \templates\foo.template or similar: \\templates(?![a-z]) # ignore long runs of a single character: \b([A-Za-z])\g{-1}{3,}\b # Note that the next example is no longer necessary if you are using # to match a string starting with a `#`, use a character-class: [#]backwards # version suffix v# [Vv]\d+(?:\b|(?=[a-zA-Z_])) # Compiler flags [\t >"'`=(](?:-J|)-[DPWXY] [\t "'`=(]-[DPWXYLlf] ,-B # curl arguments \b(?:\\n|)curl(?:\s+-[a-zA-Z]+)+ # set arguments \bset\s+-[abefiuox]+\b # tar arguments \b(?:\\n|)tar(?:\s+-[a-zA-Z]+|\s[a-z]+)+ # macOS temp folders /var/folders/\w\w/[+\w]+/(?:T|-Caches-)/ # ignore hex colors (?:[\\0][xX]|\\u|[uU]\+|#x?|\%23)[0-9a-fA-FgGrR_]{2,}(?:[uUlL]{0,3}|u\d+)\b # ignore imports import(?:["'\s]*([\w*{}\n, ]+)from\s*)?["'\s]*([@\w/_-]+)["'\s]*;? # ignore URL's https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)\b # ignore tag's in jsx <[a-zA-Z0-9]*? \b # ignore file path \/[-a-zA-Z0-9@:%._\+~#=]*\.[a-zA-Z0-9()]{1,6}\b # ignore blockchain account address ^0x[a-fA-F0-9]{40}$\b