Re: Get regular expression
- From: "Bruce Wood" <brucewood@xxxxxxxxxx>
- Date: 23 Jun 2006 13:16:58 -0700
Just playing around, I put together this little mock-up program. It's
300 lines long, but it takes the entries you posted and formats them
both as a tree and as a list of keyed entries. Hope this helps.
using System;
using System.Collections;
namespace Namespace
{
class Program
{
static string[] TextEntries =
{
"ABLATION ENDOMETRIAL (HYSTEROSCOPIC) 68.23",
"ABLATION HEART (CONDUCTION DEFECT) 37.33/2",
"ABLATION HEART (CONDUCTION DEFECT) WITH CATHETER 37.34/2",
"ABLATION INNER EAR (CRYOSURGERY) (ULTRASOUND) 20.79/4",
"ABLATION INNER EAR (CRYOSURGERY) (ULTRASOUND) BY INJECTION
20.72",
"ABLATION LESION HEART BY PERIPHERALLY INSERTED CATHETER 37.34",
"ABLATION LESION HEART ENDOVASCULAR APPROACH 37.34",
"ABLATION LESION HEART MAZE PROCEDURE (COX-MAZE) ENDOVASCULAR
APPROACH 37.34",
"ABLATION LESION HEART MAZE PROCEDURE (COX-MAZE) OPEN
(TRANS-THORACIC) APPROACH 37.33",
"ABLATION LESION HEART MAZE PROCEDURE (COX-MAZE) TRANS-THORACIC
APPROACH 37.33",
"ABLATION PITUITARY 7.69",
"ABLATION PITUITARY BY COBALT-60 92.32",
"ABLATION PITUITARY BY IMPLANTATION (STRONTIUM-YTTRIUM) (Y) NEC
92.39 ",
"ABLATION PITUITARY BY PROTON BEAM (BRAGG PEAK) 92.33 ",
"ABLATION PROSTATE (ANAT = 59.02) BY LASER, TRANSURETHRAL
60.21 ",
"ABLATION PROSTATE (ANAT = 59.02) BY RADIOFREQUENCY THERMOTHERAPY
60.97 ",
"ABLATION PROSTATE (ANAT = 59.02) BY TRANSURETHRAL NEEDLE ABLATION
(TUNA) 60.97 ",
"ABLATION PROSTATE (ANAT = 59.02) PERINEAL BY CRYOABLATION
60.62 ",
"ABLATION PROSTATE (ANAT = 59.02) PERINEAL BY RADICAL CRYOSURGICAL
ABLATION (RCSA) 60.62 ",
"ABLATION PROSTATE (ANAT = 59.02) TRANSURETHRAL BY LASER 60.21 ",
"ABLATION PROSTATE (ANAT = 59.02) TRANSURETHRAL CRYOABLATION
60.29 ",
"ABLATION PROSTATE (ANAT = 59.02) TRANSURETHRAL RADICAL CRYOSURGICAL
ABLATION (RCSA) 60.29 ",
"ABLATION TISSUE HEART - SEE ABLATION, LESION, HEART 0 ",
"ABLATION VESICLE NECK (ANAT = 60.02) 57.91 "
};
static string[] ExclusionList = { "BY", "WITH" };
public class Entry
{
private long _uid;
private string _text;
private string _citation;
private ArrayList _child;
public Entry(string text) : this(text, "", new ArrayList())
{ }
public Entry(string text, string citation) : this(text, citation,
new ArrayList())
{ }
public Entry(string text, string citation, ArrayList child)
{
this._uid = 0;
this._text = text;
this._citation = citation;
this._child = child;
}
public string Text
{
get { return this._text; }
set { this._text = value; }
}
public string Citation
{
get { return this._citation; }
set { this._citation = value; }
}
public ArrayList Child
{
get { return this._child; }
}
public long Uid
{
get { return this._uid; }
set { this._uid = value; }
}
}
static void Main(string[] args)
{
ArrayList list = new ArrayList();
foreach (string entry in TextEntries)
{
string text;
string citation;
SplitCitation(entry, out text, out citation);
AddToList(text, citation, list);
}
FoldInExcludedWords(list);
PrintList(list, 0);
long nextUid = 1;
AssignUids(list, ref nextUid);
PrintAsHashtable(list, 0);
Console.ReadLine();
}
private static void SplitCitation(string line, out string text, out
string citation)
{
// Could use Regex here, but it's probably faster to just do it the
brain-dead way
int i = line.Length - 1;
int len = 0;
while (i >= 0 && Char.IsWhiteSpace(line[i]))
{
i -= 1;
}
while (i >= 0 && Char.IsDigit(line[i]))
{
i -= 1;
len += 1;
}
if (i >= 0 && line[i] == '/')
{
i -= 1;
len += 1;
}
while (i >= 0 && Char.IsDigit(line[i]))
{
i -= 1;
len += 1;
}
if (i >= 0 && line[i] == '.')
{
i -= 1;
len += 1;
}
while (i >= 0 && Char.IsDigit(line[i]))
{
i -= 1;
len += 1;
}
if (i >= 0 && Char.IsWhiteSpace(line[i]))
{
citation = line.Substring(i + 1, len);
}
else
{
citation = "";
}
while (i >= 0 && Char.IsWhiteSpace(line[i]))
{
i -= 1;
}
if (i >= 0)
{
text = line.Substring(0, i + 1);
}
else
{
text = "";
}
}
private static int InitialEqualStringLength(string text1, string
text2)
{
int i = 0;
while (i < text1.Length && i < text2.Length && text1[i] == text2[i])
{
i++;
}
if (i >= text1.Length && i >= text2.Length)
{
return i;
}
if (i >= text1.Length && Char.IsWhiteSpace(text2[i]))
{
return i;
}
if (i >= text2.Length && Char.IsWhiteSpace(text1[i]))
{
return i;
}
if (i < text1.Length && i < text2.Length &&
Char.IsWhiteSpace(text1[i]) && Char.IsWhiteSpace(text2[i]))
{
return i;
}
do
{
i -= 1;
} while (i > 0 && !Char.IsWhiteSpace(text1[i]));
return i;
}
private static bool ExcludedWord(string text)
{
foreach (string word in ExclusionList)
{
if (word == text)
{
return true;
}
}
return false;
}
public static void AddToList(string line, string citation, ArrayList
list)
{
for (int i = 0; i < list.Count; i++)
{
Entry e = (Entry)list[i];
int matchLen = InitialEqualStringLength(line, e.Text);
if (matchLen > 0)
{
if (line == e.Text)
{
if (e.Citation.Length == 0)
{
e.Citation = citation;
}
else if (e.Citation != citation)
{
// Error! Two matching lines with different citations
}
return;
}
else if (matchLen == e.Text.Length)
{
string newText = line.Substring(matchLen).Trim();
AddToList(newText, citation, e.Child);
return;
}
else if (matchLen == line.Length)
{
e.Text = e.Text.Substring(matchLen).Trim();
Entry newEntry = new Entry(line.Substring(0, matchLen).Trim(),
citation);
newEntry.Child.Add(e);
list[i] = newEntry;
return;
}
else
{
string sharedText = line.Substring(0, matchLen).Trim();
string newOriginalText = e.Text.Substring(matchLen).Trim();
string newEntryText = line.Substring(matchLen).Trim();
e.Text = newOriginalText;
Entry newEntry = new Entry(sharedText);
newEntry.Child.Add(e);
newEntry.Child.Add(new Entry(newEntryText, citation));
list[i] = newEntry;
return;
}
}
}
// No match found in list
Entry addEntry = new Entry(line, citation);
list.Add(addEntry);
}
public static void FoldInExcludedWords(ArrayList list)
{
for (int i = 0; i < list.Count; i++)
{
Entry e = (Entry)list[i];
FoldInExcludedWords(e.Child);
if (ExcludedWord(e.Text))
{
// Add the text and a space to all child nodes
list.RemoveAt(i);
for (int j = e.Child.Count - 1; j >= 0; j--)
{
Entry f = (Entry)e.Child[j];
f.Text = e.Text + " " + f.Text;
list.Insert(i, f);
}
}
}
}
public static void PrintList(ArrayList list, int indent)
{
foreach (Entry e in list)
{
string formatString = String.Format("{{0,{0}}}{{1}}: {{2}}",
indent);
Console.WriteLine(formatString, " ", e.Text, e.Citation);
PrintList(e.Child, indent + 4);
}
}
public static void AssignUids(ArrayList list, ref long nextUid)
{
foreach (Entry e in list)
{
e.Uid = nextUid;
nextUid++;
AssignUids(e.Child, ref nextUid);
}
}
public static void PrintAsHashtable(ArrayList list, long parentUid)
{
foreach (Entry e in list)
{
Console.WriteLine("UID:{0}, Parent UID:{1}, Text:{2},
Citation:{3}", e.Uid, parentUid, e.Text, e.Citation);
PrintAsHashtable(e.Child, e.Uid);
}
}
}
}
.
- References:
- Get regular expression
- From: Mike
- Re: Get regular expression
- From: Xicheng Jia
- Re: Get regular expression
- From: Mike
- Re: Get regular expression
- From: Kevin Spencer
- Re: Get regular expression
- From: Mike
- Get regular expression
- Prev by Date: Re: Manually creating a form
- Next by Date: Re: Help With Nesting Classes In Library
- Previous by thread: Re: Get regular expression
- Next by thread: Re: Get regular expression
- Index(es):
Relevant Pages
|