regex - c# regular expression for finding links in <a> with specific ending -


i need regex pattern finding links in string (with html code) links file endings .gif or .png

example string:

<a href="//site.com/folder/picture.png" target="_blank">picture.png</a> 

for between " " , text between <a> , </a>.

i want this:

href = //site.com/folder/picture.png string = picture.png

my code far:

using system; using system.collections.generic; using system.componentmodel; using system.data; using system.diagnostics; using system.drawing; using system.linq; using system.net; using system.text; using system.text.regularexpressions; using system.threading.tasks; using system.windows.forms;  namespace downloader { public partial class form1 : form {     public form1()     {         initializecomponent();     }      private void button1_click(object sender, eventargs e)     {         string url = textbox1.text;         string s = gethtmlcode(url);         foreach (linkitem in linkfinder.find(s))         {             richtextbox1.text += convert.tostring(i);         }      }       static string gethtmlcode(string url)     {         using (webclient client = new webclient())         {             string htmlcode = client.downloadstring(url);             return htmlcode;         }     }      public struct linkitem     {         public string href;         public string text;         public override string tostring()         {             return href + "\n\t" + text + "\n\t";         }     }     static class linkfinder     {         public static list<linkitem> find(string file)         {             list<linkitem> list = new list<linkitem>();              // 1.             // find matches in file.             matchcollection m1 = regex.matches(file, @"(<a.*?>.*?</a>)",                 regexoptions.singleline);              // 2.             // loop on each match.             foreach (match m in m1)             {                 string value = m.groups[1].value;                 linkitem = new linkitem();                  // 3.                 // href attribute.                 match m2 = regex.match(value, @"href=\""(.*?)\""",                 regexoptions.singleline);                 if (m2.success)                 {                     i.href = m2.groups[1].value;                 }                  // 4.                 // remove inner tags text.                 string t = regex.replace(value, @"\s*<.*?>\s*", "",                 regexoptions.singleline);                 i.text = t;                  list.add(i);             }             return list;         }     }  }  } 

i can suggest using htmlagilitypack task. install using manage nuget packages solution menu, , add following method:

/// <summary> /// collects href attribute values , node values if image extension jpg or png /// </summary> /// <param name="html">html string or url</param> /// <returns>a key-value pair list of href values , node values</returns> private list<keyvaluepair<string, string>> getlinkswithhtmlagilitypack(string html) {     var result = new list<keyvaluepair<string, string>>();     htmlagilitypack.htmldocument hap;     uri uriresult;     if (uri.trycreate(html, urikind.absolute, out uriresult) && uriresult.scheme == uri.urischemehttp)     { // html url          var doc = new htmlagilitypack.htmlweb();         hap = doc.load(uriresult.absoluteuri);     }     else     { // html string         hap = new htmlagilitypack.htmldocument();         hap.loadhtml(html);     }     var nodes = hap.documentnode.selectnodes("//a");     if (nodes != null)         foreach (var node in nodes)             if (path.getextension(node.innertext.trim()).tolower() == ".png" ||                     path.getextension(node.innertext.trim()).tolower() == ".jpg")             result.add(new keyvaluepair<string,string>(node.getattributevalue("href", null), node.innertext));     return result; } 

then, use (i using dummy string, demo)

var result = getlinkswithhtmlagilitypack("<a href=\"//site.com/folder/picture.png\" target=\"_blank\">picture.png</a><a href=\"//site.com/folder/picture.bmp\" target=\"_blank\">picture.bmp</a>"); 

output:

enter image description here

or, url, like:

var result = getlinkswithhtmlagilitypack("http://www.google.com"); 

Comments

Popular posts from this blog

javascript - Bootstrap Popover: iOS Safari strange behaviour -

Website Login Issue developed in magento -

Can the constants be defined inside a model file of a framework in PHP? -