-----------------------------------------------------------------------------------------------------------------------
--we need a way of comparing file date and size between two web server. the comparison doesn't need to include the
--content of the file, just the presence of the file, date and size. the approach here is to let ultracompare do the 
--comparison and write the results for each root directory out to a text file. we'll consolidate the text files and 
--import them into sql server, where we can manipulate and report on it however we want.

--references:
--ultracompare command line options: http://www.ultraedit.com/help/article/command-line-options-49.html
-----------------------------------------------------------------------------------------------------------------------

use tempdb;
go

--create a staging table to receive our incoming text file
IF OBJECT_ID('dbo.IncomingCompare', 'u') IS NOT NULL DROP TABLE dbo.IncomingCompare;
CREATE TABLE dbo.IncomingCompare (
  RawLine Varchar(8000),
  ID Integer not null identity (1, 1),
  constraint IncomingCompare_PK primary key (ID),
  ParentPath55 Varchar(8000),
  ParentPath57 Varchar(8000));

--create our reporting table to hold the site differences
IF OBJECT_ID('dbo.SiteDiffs', 'u') IS NOT NULL DROP TABLE dbo.SiteDiffs;
CREATE TABLE dbo.SiteDiffs (
  Path55 Varchar(255),
  Path57 Varchar(255),
  FileType Varchar(32),
  FileName55 Varchar(255),
  FileSize55 Bigint,
  FileDate55 Datetime,
  FileName57 Varchar(255),
  FileSize57 Bigint,
  FileDate57 Datetime);

--create and populate a table to hold the file types and masks for each
IF OBJECT_ID('dbo.FileTypes', 'u') IS NOT NULL DROP TABLE dbo.FileTypes;
CREATE TABLE dbo.FileTypes (
  Match Varchar(16) not null,
  Description Varchar(64) not null);

INSERT INTO dbo.FileTypes(Match, Description)
  VALUES('..%', 'Directory'),
        ('%.aspx', '.NET Page'),
        ('%.ascx', '.NET Web Control'),
        ('%.asmx', '.NET Web Service'),
        ('%.aspx.vb', '.NET VB Page'),
        ('%.aspx.cs', '.NET C# Page'),
        ('%.vb', '.NET VB Class'),
        ('%.cs', '.NET C# Class'),
        ('%.sln', '.NET Solution'),
        ('%.suo', 'Visual Studio User Options'),
        ('%.asp', 'Classic ASP Page'),
        ('%.compiled', '.NET Precompiled Site'),
        ('%.vbhtml', '.NET Razor View'),
        ('%.pdb', '.NET Debug File'),
        ('web.config', 'ASP.NET Site Configuration'),
        ('app.config', 'ASP.NET Application Configuration'),
        ('machine.config', 'ASP.NET Machine Configuration'),
        ('%.config', 'Miscellaneous Configuration'),
        ('%.cfm', 'ColdFusion Markup'),
        ('%.cfc', 'ColdFusion Component'),
        ('%.cgi', 'CGI Script'),
        ('%.inc', 'ColdFusion Include'),
        ('%.log', 'ColdFusion Log'),
        ('%.properties', 'Properties (Java?) File'),
        ('%.bat', 'Batch File'),
        ('%.cmd', 'Batch File'),
        ('%.exe', 'Executable'),
        ('%.dll', 'Dynamic Link Library'),
        ('%.xml', 'XML File'),
        ('%.txt', 'Text File'),
        ('%.csv', 'Text File'),
        ('%.sql', 'SQL Script'),
        ('Thumbs.db', 'System File'),
        ('%.bak', 'Backup File'),
        ('%.jpg', 'Image'),
        ('%.jpeg', 'Image'),
        ('%.png', 'Image'),
        ('%.gif', 'Image'),
        ('%.ico', 'Icon'),
        ('%.htm%', 'HTML'),
        ('%.css', 'CSS'),
        ('%.js', 'Javascript'),
        ('%.php', 'PHP File'),
        ('%.mp3', 'MP3 Audio'),
        ('%.wav', 'Wave Audio'),
        ('%.pdn', 'Paint.NET Native Image'),
        ('%.doc', 'Word Document'),
        ('%.docx', 'Word Document'),
        ('%.xls', 'Excel File'),
        ('%.xlsx', 'Excel File'),
        ('%.ppt', 'PowerPoint Presentation'),
        ('%.pptx', 'PowerPoint Presentation'),
        ('%.mdb', 'Access Database'),
        ('%.zip', 'Zip File'),
        ('%.gz', 'GNU Zip File'),
        ('%.adp', 'Access Data Project'),
        ('%.js', 'Javascript'),
        ('%.pdf', 'PDF File'),
        ('%.swf', 'Shockwave File'),
        ('%.apf', 'Adobe Profile File'),
        ('%.tmp', 'Temp File'),
        ('%.old', 'Old (Backup) File');

-------------------------------------------------------------------------------
--process for importing, parsing and publishing the differences
-------------------------------------------------------------------------------
--clear out our staging table and load our incoming text file
TRUNCATE TABLE dbo.IncomingCompare;

BULK INSERT dbo.IncomingCompare
  FROM 'D:\temp\web_compare.txt'
  WITH (DATAFILETYPE = 'char',
        FORMATFILE = 'D:\temp\LoadPathComparison.xml',
        FIRSTROW = 1,
        MAXERRORS = 0);

--we're comparing 55 to 57, so our rawline will contain the servers in the first two lines of each comparison.
--the first will be 55 and the second will be 57. we need to get them on to a single line.

--update our parentpath55 column with the site on 55
UPDATE dbo.IncomingCompare
  SET ParentPath55 = RawLine 
  WHERE SUBSTRING(RawLine, 1, 5) = '[ 1 ]';

--the row after the 55 site is the corresponding site on 57, so updaate parentpath2 with the raw data
--from the line immediately following the 55 lines we just updated. since our id column is an identity,
--we'll use it to join to the next line.
WITH cteParents AS (
  SELECT p1.id, p2.RawLine Path57
    FROM dbo.IncomingCompare p1
      INNER JOIN dbo.IncomingCompare p2 ON p1.ID + 1 = p2.ID 
    WHERE p1.ParentPath55 IS NOT NULL
)
UPDATE dbo.IncomingCompare 
  SET ParentPath57 = p.Path57
  FROM cteParents p
  WHERE dbo.IncomingCompare.ID = p.ID;

--now we can do a quirky update on both parent path columns to autofill them down
DECLARE @PreviousGroup55 Varchar(8000) = '',
        @PreviousGroup57 Varchar(8000) = '';
        
UPDATE dbo.IncomingCompare
  SET @PreviousGroup55 = ParentPath55 = CASE WHEN ParentPath55 IS NULL THEN @PreviousGroup55 ELSE ParentPath55 END,
      @PreviousGroup57 = ParentPath57 = CASE WHEN ParentPath57 IS NULL THEN @PreviousGroup57 ELSE ParentPath57 END
  FROM dbo.IncomingCompare
  WITH (INDEX (0), TABLOCK)
  OPTION (MAXDOP 1);

--finally, we're left with a columnar data set we can work with. delete the rows we don't need.
DELETE FROM dbo.IncomingCompare 
  WHERE NULLIF(RawLine, '') IS NULL
     OR RawLine LIKE '-----%'
     OR RawLine LIKE '| Name%'
     OR SUBSTRING(RawLine, 1, 5) IN ('[ 1 ]', '[ 2 ]');

--clear out our reporting table
TRUNCATE TABLE dbo.SiteDiffs;

--clean up our incoming rows and publish them to the reporting table
WITH cteSplit AS (
  --use a crosstab query to separate our split items into columns
  SELECT ic.ParentPath55, ic.ParentPath57, ic.RawLine,
      MAX(CASE WHEN s.ItemNumber = 2 THEN s.Item END) FileName55,
      MAX(CASE WHEN s.ItemNumber = 3 THEN s.Item END) FileSize55,
      MAX(CASE WHEN s.ItemNumber = 4 THEN s.Item END) FileDate55,
      MAX(CASE WHEN s.ItemNumber = 5 THEN s.Item END) FileName57,
      MAX(CASE WHEN s.ItemNumber = 6 THEN s.Item END) FileSize57,
      MAX(CASE WHEN s.ItemNumber = 7 THEN s.Item END) FileDate57      
    FROM dbo.IncomingCompare ic 
      CROSS APPLY util.dbo.DelimitedSplit8K(ic.RawLine, '|') s
    GROUP BY ic.ParentPath55, ic.ParentPath57, ic.RawLine
),
cteTrimmed AS (
  --everything we brought in has lots of padding on both ends, so we'll eliminate it
  SELECT Path55 = LTRIM(RTRIM(ParentPath55)),
      Path57 = LTRIM(RTRIM(ParentPath57)),
      FileName55 = LTRIM(RTRIM(FileName55)),
      FileSize55 = LTRIM(RTRIM(FileSize55)),
      FileDate55 = LTRIM(RTRIM(FileDate55)),
      FileName57 = LTRIM(RTRIM(FileName57)),
      FileSize57 = LTRIM(RTRIM(FileSize57)),
      FileDate57 = LTRIM(RTRIM(FileDate57))
    FROM cteSplit
),
cteCleaned AS (
  --clean our results and explicitly convert our values. we have to allow for the word "none" in
  --the size and date columns, as that's what ultra compare populates when the matching file or
  --directory isn't on the other server.
  SELECT Path55 = LTRIM(REPLACE(t.Path55, '[ 1 ]', '')),
      Path57 = LTRIM(REPLACE(t.Path57, '[ 2 ]', '')),
      t.FileName55,
      FileSize55 = CONVERT(Bigint, NULLIF(REPLACE(t.FileSize55, ',', ''), 'none')),
      FileDate55 = CONVERT(Datetime, NULLIF(t.FileDate55, 'none')),
      t.FileName57,
      FileSize57 = CONVERT(Bigint, NULLIF(REPLACE(t.FileSize57, ',', ''), 'none')),
      FileDate57 = CONVERT(Datetime, NULLIF(t.FileDate57, 'none'))
    FROM cteTrimmed t
)
INSERT INTO dbo.SiteDiffs(Path55, Path57, Filename55, FileSize55, FileDate55, Filename57, FileSize57, FileDate57)
  SELECT c.Path55, c.Path57, c.FileName55, c.FileSize55, c.FileDate55, c.FileName57, c.FileSize57, c.FileDate57
    FROM cteCleaned c;

--pull the datestamp info off the parent paths
UPDATE dbo.SiteDiffs 
  SET Path55 = SUBSTRING(Path55, 1, CHARINDEX(' ', Path55)),
    Path57 = SUBSTRING(Path57, 1, CHARINDEX(' ', Path57));

--set the file type for each row using the filename on 55
UPDATE sd
  SET FileType = ft.Description
  FROM dbo.SiteDiffs sd
    CROSS APPLY dbo.FileTypes ft
  WHERE sd.FileName55 LIKE ft.Match;

--where the file type isn't set, apply the same rules using the filename on 57
UPDATE sd
  SET FileType = ft.Description
  FROM dbo.SiteDiffs sd
    CROSS APPLY dbo.FileTypes ft
  WHERE sd.FileName57 LIKE ft.Match
    AND sd.FileType IS NULL;

--where the file type still isn't set, we're going to set it to uncategorized
UPDATE dbo.SiteDiffs 
  SET FileType = 'Uncategorized'
  WHERE FileType IS NULL;

--we're done, so take a look at our data and see how beautiful it is
SELECT TOP 500 Path55, Path57, FileType, FileName55, FileSize55, FileDate55, FileName57, FileSize57, FileDate57
  FROM dbo.SiteDiffs
  ORDER BY Path55, FileName55;
