Document Cleanup Automation

Tech-Archive recommends: Repair Windows Errors & Optimize Windows Performance

From: Steven Drenker (sdrenker_at_takeout_pacbell.net)
Date: 05/15/04

  • Next message: Howard Kaikow: "Re: macros lost"
    Date: Sat, 15 May 2004 12:04:56 -0700
    
    

    Hi all...I frequently grab articles on the web and like to clean them up
    into a standard format. I wrote the following macro that does a whole lot
    of clean up as defined in the two string arrays vFindText and vReplText
    (They used to be Variant arrays -- the "v" prefix stuck). I am having a
    couple of problems. It still hangs occasionally when there are two or more
    returns. I solved this problem with usergroup help a couple years ago, but
    can't find the solution.

    I want to make apply the "Heading 1" style to the first paragraph and apply
    the "Heading 3" style to any paragraph that fits on one line. I attempted
    this at the end, but with so-so results. Any ideas how to address these two
    problems?

    Steve Drenker

    ' This macro removes extra spaces, spaces before commas, spaces after

    ' open paren and before close paren, spaces before paragraph returns,

    ' spaces after paragraph returns and multiple paragraph returns.

    ' Adapted 2/7/03 from Jonathon West, Word MVP, http://www.multilinker.com

        Const ArrayUBound = 19

        Dim vFindText(ArrayUBound) As String

        Dim vReplText(ArrayUBound) As String

        Dim i As Long

        Dim MyRange As Range

        Dim objPara As Paragraph

    ' NOTE: Put the "^p^p" last after the previous cleanup of paragraph returns

    ' has been completed (i.e., spaces before and after other para returns)

        vFindText(0) = "( "

        vFindText(1) = " )"

        vFindText(2) = " ,"

        vFindText(3) = " ."

        vFindText(4) = " " ' Two spaces

        vFindText(5) = " ^p" ' Space in front of paragraph return

        vFindText(6) = "^p "

        vFindText(7) = "--" ' En-dash

        vFindText(8) = ",,"

        vFindText(9) = "..." ' Ellipsis

        vFindText(10) = ".." ' Double periods (after completing ellipsis
    conversion)

        vFindText(11) = "Ö " ' Space after ellipsis

        vFindText(12) = "``" ' Left double quotes

        vFindText(13) = "`" ' Left single quote

        vFindText(14) = "''"

        vFindText(15) = " ^= "

        vFindText(16) = " ^="

        vFindText(17) = "^= "

        vFindText(18) = "|"

        vFindText(19) = "^p^p"

        

        vReplText(0) = "("

        vReplText(1) = ")"

        vReplText(2) = ","

        vReplText(3) = "."

        vReplText(4) = " "

        vReplText(5) = "^p"

        vReplText(6) = "^p"

        vReplText(7) = "^="

        vReplText(8) = ","

        vReplText(9) = "Ö"

        vReplText(10) = "."

        vReplText(11) = "Ö"

        vReplText(12) = """"

        vReplText(13) = "'"

        vReplText(14) = """"

        vReplText(15) = "|"

        vReplText(16) = "|"

        vReplText(17) = "|"

        vReplText(18) = "^s^=^s"

        vReplText(19) = "^p"

        

    ' Alternative approach to initializing arrays. This gets very hard to read

    ' with more than 4 or 5 elements because you can't match the Find string

    ' to the Replace string.

    ' Dim vFindText As Variant

    ' Dim vReplText As Variant

    ' vFindText = Array("( ", " )", " ,", " .", " ", " ^p", "^p ", "^p^p", "
    ", "'", """")

    ' vReplText = Array("(", ")", ",", ".", " ", "^p", "^p", "^p", " ", "'",
    """")

        

        With ActiveDocument.Content.Find

            .ClearFormatting

            For i = 0 To UBound(vFindText)

                Selection.HomeKey Unit:=wdStory

                

                Debug.Print i & " Find: " & vFindText(i)

                Debug.Print i & " Replace: " & vReplText(i) & vbCrLf

                If i = UBound(vFindText) Then

    ' Find & Replace won't delete the first or last paragraph returns

    ' in a document if they are empty. The lines in this If-Then block

    ' execute after completing all other cleanup except multiple CrLf

    ' It removes multiple returns at the beginning and the end of the

    ' document. This avoids the last cleanup code (^p^p --> ^p) going

    ' into an infinite loop if there is a double return at the end of

    ' the document.

                    Set MyRange = ActiveDocument.Paragraphs(1).Range

                    Do While MyRange.Text = vbCr

                        MyRange.Delete

                        Set MyRange = ActiveDocument.Paragraphs(1).Range

                    Loop

                    

                    Set MyRange = ActiveDocument.Paragraphs.Last.Range

                    Do While MyRange.Text = vbCr

                        MyRange.Delete

                        Set MyRange = ActiveDocument.Paragraphs.Last.Range

                    Loop

                End If

        

    ' Do all the other conversions
                Do While .Execute(FindText:=vFindText(i), _

                                  Forward:=True, _

                                  Format:=True) = True

                    .Execute FindText:=vFindText(i), _

                             Forward:=True, _

                             Format:=True, _

                             ReplaceWith:=vReplText(i), _

                             Replace:=wdReplaceAll

                Loop

            Next i

    ' Convert straight quotes to curly quotes. Can't do this with the Do While /
    Loop

    ' construct above because .Execute will always find both straight AND curly
    quotes.

    ' Therefore need to do the .Execute on a single-pass.

            .Execute FindText:="'", _

                     Forward:=True, _

                     Format:=True, _

                     ReplaceWith:="'", _

                     Replace:=wdReplaceAll

                     

            .Execute FindText:="""", _

                     Forward:=True, _

                     Format:=True, _

                     ReplaceWith:="""", _

                     Replace:=wdReplaceAll

        

        End With

        

    ' Now format the document

        Selection.WholeStory

        Selection.Style = ActiveDocument.Styles("Normal")

        ActiveDocument.Styles("Normal").ParagraphFormat.SpaceBefore = 12

        

        For Each objPara In ActiveDocument.Paragraphs

            If objPara.Range.Characters.Count < 80 Then

                objPara.Style = ActiveDocument.Styles("Heading 3")

            End If

        Next objPara

        

        Selection.HomeKey Unit:=wdStory

        Selection.Style = ActiveDocument.Styles("Heading 1")

        

    ' Selection.Range.Paragraphs(1) = ActiveDocument.Styles("Heading 1")

    End Sub

    Sub CleanUp()

        Selection.Find.ClearFormatting

        Selection.Find.Replacement.ClearFormatting

        With Selection.Find

            .Execute FindText:=" ^p", _

                     Forward:=True, _

                     Format:=True, _

                     ReplaceWith:="^p", _

                     Replace:=wdReplaceAll

                     

            .Execute FindText:="^p ", _

                     Forward:=True, _

                     Format:=True, _

                     ReplaceWith:="^p", _

                     Replace:=wdReplaceAll

                     

            .Execute FindText:="^p^p", _

                     Forward:=True, _

                     Format:=True, _

                     ReplaceWith:="|", _

                     Replace:=wdReplaceAll

            

            .Execute FindText:="^p", _

                     Forward:=True, _

                     Format:=True, _

                     ReplaceWith:=" ", _

                     Replace:=wdReplaceAll

            .Execute FindText:="|", _

                     Forward:=True, _

                     Format:=True, _

                     ReplaceWith:="^p", _

                     Replace:=wdReplaceAll

        End With

        

        Selection.Style = ActiveDocument.Styles("Normal")

        Selection.Font.Reset

        ActiveDocument.Styles("Normal").ParagraphFormat.SpaceBefore = 12

    End Sub

    Sub CleanUpMultipleReturns()

    ' Source:
    http://groups.google.com/groups?hl=en&lr=&ie=UTF-8&oe=UTF-8&threadm=%23clWyR
    wzCHA.2868%40TK2MSFTNGP12&rnum=26&prev=/groups%3Fq%3Dsearch%2Breplace%2Bgrou
    p:microsoft.public.word.vba.general%26start%3D20%26hl%3Den%26lr%3D%26ie%3DUT
    F-8%26oe%3DUTF-8%26group%3Dmicrosoft.public.word.vba.general%26selm%3D%2523c
    lWyRwzCHA.2868%2540TK2MSFTNGP12%26rnum%3D26

    ' This macro strips hard returns from end of every line and restores

    ' a single hard return where multiple returns previously existed.

        Const ArrayUBound = 5

        Dim vFindText(ArrayUBound) As String

        Dim vReplText(ArrayUBound) As String

        Dim i As Long

        Dim MyRange As Range

    ' NOTE: Put the "^p^p" last after the previous cleanup of paragraph returns

    ' has been completed (i.e., spaces before and after other para returns)

        vFindText(0) = " ^p" ' Space in front of paragraph return

        vFindText(1) = "^p " ' Space after paragraph return

        vFindText(2) = "^p^p" ' Convert double paragraph returns to single
    pipe character

        vFindText(3) = "^p" ' Convert all remaining paragraph returns to
    spaces

        vFindText(4) = "||"

        vFindText(5) = "|" ' Convert remaining single pipe characters back
    to single paragraph returns

        

        vReplText(0) = "^p"

        vReplText(1) = "^p"

        vReplText(2) = "|" ' Mark any double paragraph returns with pipe
    character

        vReplText(3) = " "

        vReplText(4) = "|"

        vReplText(5) = "^p"

        

        

    ' Alternative approach to initializing arrays. This gets very hard to read

    ' with more than 4 or 5 elements because you can't match the Find string

    ' to the Replace string.

    ' Dim vFindText As Variant

    ' Dim vReplText As Variant

    ' vFindText = Array("( ", " )", " ,", " .", " ", " ^p", "^p ", "^p^p", "
    ", "'", """")

    ' vReplText = Array("(", ")", ",", ".", " ", "^p", "^p", "^p", " ", "'",
    """")

        

        With ActiveDocument.Content.Find

            .ClearFormatting

            For i = 0 To UBound(vFindText)

                Selection.HomeKey Unit:=wdStory

                

                Debug.Print i & " Find: " & vFindText(i)

                Debug.Print i & " Replace: " & vReplText(i) & vbCrLf

                ' Find & Replace won't delete the first or last paragraph
    returns

                ' in a document if they are empty. This If-Then block executes
    after

                ' spaces have been removed in front of and after paragraph
    returns.

                ' It removes multiple returns at the beginning and the end of
    the

                ' document. This avoids going into an infinite loop if there is
    a

                ' double return at the end of the document.

                If vFindText(i) <> "^p^p" Then

                    Set MyRange = ActiveDocument.Paragraphs(1).Range

                    Do While MyRange.Text = vbCr

                        MyRange.Delete

                        Set MyRange = ActiveDocument.Paragraphs(1).Range

                    Loop

                    

                    Set MyRange = ActiveDocument.Paragraphs.Last.Range

                    Do While MyRange.Text = vbCr

                        MyRange.Delete

                        Set MyRange = ActiveDocument.Paragraphs.Last.Range

                    Loop

                End If

        

                If vFindText(i) <> "^p" Then

                    Do While .Execute(FindText:=vFindText(i), _

                                      Forward:=True, _

                                      Format:=True) = True

                        .Execute FindText:=vFindText(i), _

                                 Forward:=True, _

                                 Format:=True, _

                                 ReplaceWith:=vReplText(i), _

                                 Replace:=wdReplaceAll

                    Loop

                Else

                ' Don't want a Do While / Loop when searching for paragraph
    returns

                ' because it will infinitely loop at last paragraph return
    because

                ' it never gets converted to a space

                    .Execute FindText:=vFindText(i), _

                             Forward:=True, _

                             Format:=True, _

                             ReplaceWith:=vReplText(i), _

                             Replace:=wdReplaceAll

                End If

            Next i

        End With

        

        Selection.WholeStory

        Selection.Style = ActiveDocument.Styles("Normal")

        ActiveDocument.Styles("Normal").ParagraphFormat.SpaceBefore = 12

    End Sub

    Sub SearchReplace()

    ' Detect when Word is trapped with a partial wildcard match

    ' at the very end of a document

    ' Article contributed by Bill Coan

    ' Dim SearchString(4) As String

        

    ' SearchString(0) = "\<\<"

    ' SearchString(1) = "\<[!>]@\<"

    ' SearchString(2) = "\>\>"

    ' SearchString(3) = "\>[!<]@\>"

        Dim SearchFor() As String

        Dim ReplaceWith() As String

         'SearchFor() = (" ^p", " .")

         

         SearchString(1) = "^p"

         

        For i = 0 To 3

            Selection.HomeKey Unit:=wdStory

        

             'call subroutine that clears settings from find dialog

            Call ClearFindAndReplaceParameters

        

            With Selection.Find

                .Text = SearchString(i)

                .Replacement.Text = ""

                .Forward = True

                .Wrap = wdFindStop

                .Format = False

                .MatchCase = False

                .MatchWholeWord = False

                .MatchWildcards = True

                .MatchSoundsLike = False

                .MatchAllWordForms = v

            End With

        

            Do While Selection.Find.Execute()

                'here's where we detect the fact that

                'Word got lost at the end of the doc

                If Selection.End = 0 Then Exit Do

               'insert code here to act on found text

                Selection.Collapse wdCollapseEnd

            Loop

        

        Next i

    End Sub

    Sub ClearFindAndReplaceParameters()

    ' Clear settings from Find and Replace dialog to prevent

    ' unexpected results from future Find or Replace operations

    ' Article contributed by Bill Coan

    ' To prevent the user from having to change the settings in the

    ' Find and Replace dialog after running your macros, make sure you

    ' call the following procedure after doing any Find and Replace

    ' operations in VBA.

        With Selection.Find

           .ClearFormatting

           .Replacement.ClearFormatting

           .Text = ""

           .Replacement.Text = ""

           .Forward = True

           .Wrap = wdFindStop

           .Format = False

           .MatchCase = False

           .MatchWholeWord = False

           .MatchWildcards = False

           .MatchSoundsLike = False

           .MatchAllWordForms = False

        End With

    End Sub

    Sub Replace_line_breaks_with_para_returns()

        Selection.Find.ClearFormatting

        Selection.Find.Replacement.ClearFormatting

        With Selection.Find

            .Text = "^l"

            .Replacement.Text = "^p"

            .Forward = True

            .Wrap = wdFindStop

            .Format = False

            .MatchCase = False

            .MatchWholeWord = False

            .MatchWildcards = False

            .MatchSoundsLike = False

            .MatchAllWordForms = False

        End With

        Selection.Find.Execute Replace:=wdReplaceAll

    End Sub


  • Next message: Howard Kaikow: "Re: macros lost"

    Relevant Pages

    • Project Error
      ... Private Declare Sub Sleep Lib "Kernel32" ... Dim strDataSrc As String ...
      (microsoft.public.vb.bugs)
    • Re: Is there a way to prevent a RichTextBox from scrolling?
      ... Private _isRegex As Boolean ... Public Sub New(ByVal thispattern As String, ... Dim entry As tDict ...
      (microsoft.public.dotnet.framework.windowsforms.controls)
    • Excel Listing tool using VB
      ... Sub ListFiles2() ... Dim directories() As String, CurrentDirectory As String ... Dim dirtopaste, dirok ...
      (microsoft.public.vb.general.discussion)
    • Form Error
      ... SMSDS_CallerID As String ... Private Declare Sub Sleep Lib "kernel32" ... Dim ComString As String ... Dim AppPath As String, FreeFileNo% ...
      (microsoft.public.vb.bugs)
    • Re: Encrypt/hide Password
      ... Public Sub New(ByVal strCryptoName As String) ... ' instantiated crypto class. ... Dim fsKey As New FileStream(strSaveToPath, FileMode.OpenOrCreate, _ ...
      (microsoft.public.scripting.wsh)