Regex woes

Collapse
This topic is closed.
X
X
 
  • Time
  • Show
Clear All
new posts
  • Karch

    Regex woes

    I have these two methods that are chewing up a ton of CPU time in my
    application. Does anyone have any suggestions on how to optimize them or
    rewrite them without Regex? The most time-consuming operation by a long-shot
    is the regex.Replace. Basically the only purpose of it is to remove spaces
    between opening/closing tags and the element name. Surely there is a better
    way.

    private string FixupJavascript (string htmlCode)
    {
    string result = FixupHTML(htmlC ode);
    result = result.Replace( "\\", "\\\\");
    result = result.Replace( "\"", "\\\"");
    Regex regex = new Regex("<\\s*scr ipt", RegexOptions.Ig noreCase);
    result = regex.Replace(r esult, "<scr\" + \"ipt");
    regex = new Regex("<\\s*\\/script>", RegexOptions.Ig noreCase);
    result = regex.Replace(r esult, "</scr\" + \"ipt>");
    result = result.Replace( Environment.New Line,
    string.Empty).R eplace("\t", string.Empty).R eplace("\n",
    string.Empty).T rim();

    return result.Trim();
    }

    private string FixupHTML(strin g htmlCode)
    {
    string result = htmlCode;
    Regex regex = new Regex("<\\s*\\/*html[^>]*>",
    RegexOptions.Ig noreCase);
    result = regex.Replace(r esult, string.Empty);
    regex = new Regex("<\\s*hea d.*>.*<\\s*\\/head[^>]*>",
    RegexOptions.Ig noreCase);
    result = regex.Replace(r esult, string.Empty);
    regex = new Regex("<\\s*\\/*body[^>]*>",
    RegexOptions.Ig noreCase);
    result = regex.Replace(r esult, string.Empty);
    regex = new Regex("[^:]\\/\\/.*", RegexOptions.Ig noreCase);
    result = regex.Replace(r esult, string.Empty);
    return result;
    }


Working...