how to filter monthly data from a yearly data file ?

  • Client send a YEARLY Data file every month and I want to take that YEARLY file and filter the date for the current month i.e. lets says client sends me today January month 2016 a file that has worth all of 2015 and I want only December data how do I filter it ?

  • You would add a WHERE clause.

    Yes, I know that this reply is probably too vague for your purposes. But so is your questions. Give us more details, and we can give you better help.


    Hugo Kornelis, SQL Server/Data Platform MVP (2006-2016)
    Visit my SQL Server blog: https://sqlserverfast.com/blog/
    SQL Server Execution Plan Reference: https://sqlserverfast.com/epr/

  • Here is a quick example to help you get started.

    😎

    Note: do not make the mistake of using functions on the date column in the where clause, forces the server to do a full scan instead of seek.

    USE tempdb;

    GO

    SET NOCOUNT ON;

    -- SAMPLE DATA SET

    --/*

    IF OBJECT_ID(N'dbo.TBL_TEST_YEARLY_DATA') IS NOT NULL DROP TABLE dbo.TBL_TEST_YEARLY_DATA;

    CREATE TABLE dbo.TBL_TEST_YEARLY_DATA

    (

    TYD_ID INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBO_TBL_TEST_YEARLY_DATA_TYD_ID PRIMARY KEY CLUSTERED

    ,TYD_DATE DATE NOT NULL

    ,TYD_VALUE INT NOT NULL CONSTRAINT DFLT_DBO_TBL_TEST_YEARLY_DATA_TYD_VALUE DEFAULT ((ABS(CHECKSUM(NEWID())) % 10000))

    );

    -- SAMPLE INDEX

    CREATE NONCLUSTERED INDEX NCLIDX_DBO_TBL_TEST_YEARLY_DATA_TYD_DATE ON dbo.TBL_TEST_YEARLY_DATA (TYD_DATE ASC) INCLUDE ( TYD_ID , TYD_VALUE );

    DECLARE @START_DATE DATE = CONVERT(DATE,'20150101',112);

    DECLARE @END_DATE DATE = CONVERT(DATE,'20161231',112);

    DECLARE @INTERVAL INT = 1;

    ;WITH T(N) AS (SELECT N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) AS X(N))

    , NUMS(N) AS (SELECT TOP((DATEDIFF(DAY,@START_DATE,@END_DATE) / @INTERVAL ) +1) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N

    FROM T T1,T T2,T T3,T T4,T T5,T T6)

    INSERT INTO dbo.TBL_TEST_YEARLY_DATA(TYD_DATE)

    SELECT

    DATEADD(DAY,(NM.N * @INTERVAL),@START_DATE)

    FROM NUMS NM ;

    -- */

    -- HARD CODED VALUES

    SELECT

    TYD.TYD_ID

    ,TYD.TYD_DATE

    ,TYD.TYD_VALUE

    FROM dbo.TBL_TEST_YEARLY_DATA TYD

    WHERE TYD.TYD_DATE >= CONVERT(DATE,'20160101',112)

    AND TYD.TYD_DATE <= CONVERT(DATE,'20160110',112);

    -- CTE RETURNING THE BEGINNING OF CURRENT MONTH AND CURRENT DATE

    ;WITH CURRENT_DATES AS

    (

    SELECT

    DATEFROMPARTS(YEAR(GETDATE()),MONTH(GETDATE()),1) AS BEGINNING_OF_MONTH

    ,CONVERT(DATE,GETDATE(),0) AS CURRENT_DATE_VALUE

    )

    SELECT

    TYD.TYD_ID

    ,TYD.TYD_DATE

    ,TYD.TYD_VALUE

    FROM dbo.TBL_TEST_YEARLY_DATA TYD

    CROSS APPLY CURRENT_DATES CD

    WHERE TYD.TYD_DATE BETWEEN CD.BEGINNING_OF_MONTH

    AND CD.CURRENT_DATE_VALUE;

    Output (same for both queries)

    TYD_ID TYD_DATE TYD_VALUE

    ----------- ---------- -----------

    366 2016-01-01 5696

    367 2016-01-02 1099

    368 2016-01-03 8739

    369 2016-01-04 7584

    370 2016-01-05 6431

    371 2016-01-06 8669

    372 2016-01-07 9044

    373 2016-01-08 1176

    374 2016-01-09 4395

    375 2016-01-10 7418

  • Eirikur Eiriksson (1/10/2016)


    Here is a quick example to help you get started.

    😎

    Note: do not make the mistake of using functions on the date column in the where clause, forces the server to do a full scan instead of seek.

    USE tempdb;

    GO

    SET NOCOUNT ON;

    -- SAMPLE DATA SET

    --/*

    IF OBJECT_ID(N'dbo.TBL_TEST_YEARLY_DATA') IS NOT NULL DROP TABLE dbo.TBL_TEST_YEARLY_DATA;

    CREATE TABLE dbo.TBL_TEST_YEARLY_DATA

    (

    TYD_ID INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBO_TBL_TEST_YEARLY_DATA_TYD_ID PRIMARY KEY CLUSTERED

    ,TYD_DATE DATE NOT NULL

    ,TYD_VALUE INT NOT NULL CONSTRAINT DFLT_DBO_TBL_TEST_YEARLY_DATA_TYD_VALUE DEFAULT ((ABS(CHECKSUM(NEWID())) % 10000))

    );

    -- SAMPLE INDEX

    CREATE NONCLUSTERED INDEX NCLIDX_DBO_TBL_TEST_YEARLY_DATA_TYD_DATE ON dbo.TBL_TEST_YEARLY_DATA (TYD_DATE ASC) INCLUDE ( TYD_ID , TYD_VALUE );

    DECLARE @START_DATE DATE = CONVERT(DATE,'20150101',112);

    DECLARE @END_DATE DATE = CONVERT(DATE,'20161231',112);

    DECLARE @INTERVAL INT = 1;

    ;WITH T(N) AS (SELECT N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) AS X(N))

    , NUMS(N) AS (SELECT TOP((DATEDIFF(DAY,@START_DATE,@END_DATE) / @INTERVAL ) +1) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N

    FROM T T1,T T2,T T3,T T4,T T5,T T6)

    INSERT INTO dbo.TBL_TEST_YEARLY_DATA(TYD_DATE)

    SELECT

    DATEADD(DAY,(NM.N * @INTERVAL),@START_DATE)

    FROM NUMS NM ;

    -- */

    -- HARD CODED VALUES

    SELECT

    TYD.TYD_ID

    ,TYD.TYD_DATE

    ,TYD.TYD_VALUE

    FROM dbo.TBL_TEST_YEARLY_DATA TYD

    WHERE TYD.TYD_DATE >= CONVERT(DATE,'20160101',112)

    AND TYD.TYD_DATE <= CONVERT(DATE,'20160110',112);

    -- CTE RETURNING THE BEGINNING OF CURRENT MONTH AND CURRENT DATE

    ;WITH CURRENT_DATES AS

    (

    SELECT

    DATEFROMPARTS(YEAR(GETDATE()),MONTH(GETDATE()),1) AS BEGINNING_OF_MONTH

    ,CONVERT(DATE,GETDATE(),0) AS CURRENT_DATE_VALUE

    )

    SELECT

    TYD.TYD_ID

    ,TYD.TYD_DATE

    ,TYD.TYD_VALUE

    FROM dbo.TBL_TEST_YEARLY_DATA TYD

    CROSS APPLY CURRENT_DATES CD

    WHERE TYD.TYD_DATE BETWEEN CD.BEGINNING_OF_MONTH

    AND CD.CURRENT_DATE_VALUE;

    Output (same for both queries)

    TYD_ID TYD_DATE TYD_VALUE

    ----------- ---------- -----------

    366 2016-01-01 5696

    367 2016-01-02 1099

    368 2016-01-03 8739

    369 2016-01-04 7584

    370 2016-01-05 6431

    371 2016-01-06 8669

    372 2016-01-07 9044

    373 2016-01-08 1176

    374 2016-01-09 4395

    375 2016-01-10 7418

    Thank you but please explain, I could not understand the answer ?

    Meanwhile let me rephrase my question

    So let me rephrase the question

    We would take the full file (Dec file) , compare/ check it to prior records imported (Nov file) and ignore the duplicate records and then write out a new member import file which only contains records that have changed.

    I believe EXCEPT Operator should do the job ?

    Select * from Decfile data

    except

    Select * from Novfile data

  • So let me rephrase the question

    We would take the full file (Dec file) , compare/ check it to prior records imported (Nov file) and ignore the duplicate records and then write out a new member import file which only contains records that have changed.

    I believe EXCEPT Operator should do the job ?

    Select * from Decfile data

    except

    Select * from Novfile data

    for clarification please......

    will ANY of the records imported from Nov file have been altered when you get the Dec file?

    is there any unique identifier on the file that you receive?

    how many rows / columns in the file you receive?

    ________________________________________________________________
    you can lead a user to data....but you cannot make them think
    and remember....every day is a school day

  • First of all you need to provide the full and complete information, remember that we cannot see or know anything that's not posted here! Suggest that you post the DDL(create table) and sample data insert script along with the expected results and what you have tried so far.

    😎

  • J Livingston SQL (1/10/2016)


    So let me rephrase the question

    We would take the full file (Dec file) , compare/ check it to prior records imported (Nov file) and ignore the duplicate records and then write out a new member import file which only contains records that have changed.

    I believe EXCEPT Operator should do the job ?

    Select * from Decfile data

    except

    Select * from Novfile data

    for clarification please......

    will ANY of the records imported from Nov file have been altered when you get the Dec file? - [highlight=#ffff11]Yes the member file records are modified and that is already taken into account and loaded into DEC file, hope I answered you question[/highlight]

    is there any unique identifier on the file that you receive?

    how many rows / columns in the file you receive? [highlight=#ffff11]Yes there is a primary key and there are about 140 columns and 250k records[/highlight]

    Yes Good question see inline answers above

  • Yes Good question see inline answers above

    ???

    where are they? (help <grin>)

    edit...timing issue...sorry

    ________________________________________________________________
    you can lead a user to data....but you cannot make them think
    and remember....every day is a school day

  • will ANY of the records imported from Nov file have been altered when you get the Dec file? - Yes the member file records are modified and that is already taken into account and loaded into DEC file, hope I answered you question[/highlight]

    is there any unique identifier on the file that you receive?

    how many rows / columns in the file you receive? - Yes there is a primary key and there are about 140 columns and 250k records[/highlight]

  • Rankerg (1/10/2016)


    will ANY of the records imported from Nov file have been altered when you get the Dec file? - Yes the member file records are modified and that is already taken into account and loaded into DEC file, hope I answered you question[/highlight]

    is there any unique identifier on the file that you receive?

    how many rows / columns in the file you receive? - Yes there is a primary key and there are about 140 columns and 250k records[/highlight]

    one more question.......are there any columns that may give indication of last modified/insert date or that can help indicate change between nov file and dec file?

    ________________________________________________________________
    you can lead a user to data....but you cannot make them think
    and remember....every day is a school day

  • That is the question so basically we need to compare record-by-record and ignore the duplicates and then insert changed records into that new file that is the requirement

  • I am once more going to tell you that the question is too vague for an answer.

    In order for us to help you, you have to provide clear, concrete information. So if any tables are involved (I assume there are, otherwise you wouldn't post on this site), then post their schema, in the form of a CREATE TABLE statement that includes all the constraints.

    For any tables that contain data (imported data or existing data), provide us with sample data in the form of INSERT statements. Please test the CREATE TABLE and INSERT statements. The easier it is for us to recreate your problem, the more chance we are willing to help you - and copy, paste, execute is the easiest it gets.

    If external data sources are involved, tell us exactly what they look like. If for instance you import Excel sheets, then either attach a sample sheet, or post a screenshot of a sheet. Of course if you already imported the Excel data into a staging table, then simply give us that table (CREATE TABLE and INSERT statements) instead.

    For all the sample data posted, keep in mind that it has to be representative (i.e. all normal and special cases should be included), and as short as possible (so don't post your two-million row production database, but create a sample that illustrates the issue in ideally less than a dozen rows).

    Finally, post the expected results. For this a tabular format is often ideal (as a screenshot or use a fixed-width datatype, and check in preview mode that spaces are retained - use the code tag if needed). Also, remember that what's be obvious to you may not be obvious to others, so do add some explanation to help us understand how the sample input data results in the posted end result.


    Hugo Kornelis, SQL Server/Data Platform MVP (2006-2016)
    Visit my SQL Server blog: https://sqlserverfast.com/blog/
    SQL Server Execution Plan Reference: https://sqlserverfast.com/epr/

  • Rankerg (1/10/2016)


    Thank you but please explain, I could not understand the answer ?

    Meanwhile let me rephrase my question

    So let me rephrase the question

    We would take the full file (Dec file) , compare/ check it to prior records imported (Nov file) and ignore the duplicate records and then write out a new member import file which only contains records that have changed.

    I believe EXCEPT Operator should do the job ?

    Select * from Decfile data

    except

    Select * from Novfile data

    Echoing Hugo's post, the question is not clear enough, the problem is that we cannot see you're screen or know anything that hasn't been posted. The problem you have sounds trivial but this is not a game of guessing, this site is all about the practice of providing the best possible answer 10 times out of 10.

    😎

  • --- example only to fully understand the problem.

    --- expect some feedback to determine if this is representative of your problem

    --- if not then PLEASE provide necessary code to explain...based on format below.

    --- THANKS

    USE tempdb;

    SET NOCOUNT ON;

    IF OBJECT_ID('tempdb..Base_table', 'U') IS NOT NULL

    DROP TABLE tempdb..Base_table;

    IF OBJECT_ID('tempdb..Dec_file', 'U') IS NOT NULL

    DROP TABLE tempdb..Dec_file;

    IF OBJECT_ID('tempdb..#temp', 'U') IS NOT NULL

    DROP TABLE tempdb..#temp;

    -- lets assume that the Base_table below is as after the Nov file import

    CREATE TABLE Base_table(

    TranID INTEGER NOT NULL

    ,CustomerID INTEGER NOT NULL

    ,ProdID INTEGER NOT NULL

    ,SalesAmount NUMERIC(5,2) NOT NULL

    ,TransDate DATETIME NOT NULL

    );

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (1,60634,5093,76.99,'2015-11-11 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (2,5755,6803,81.34,'2015-11-19 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (3,89722,5702,95.11,'2015-11-27 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (4,31744,1603,49.31,'2015-11-04 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (5,42486,939,60.25,'2015-11-28 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (6,5952,482,30.08,'2015-11-28 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (7,28933,769,43.93,'2015-11-15 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (8,7682,3371,79.29,'2015-11-22 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (9,58330,1777,77.69,'2015-11-01 00:00:00.000');

    INSERT INTO Base_table(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (10,31558,6910,45.30,'2015-11-12 00:00:00.000');

    -- lets assume that this the Dec import file you receive.....note two amnedments to exg rows and two additional rows

    CREATE TABLE Dec_file(

    TranID INTEGER NOT NULL

    ,CustomerID INTEGER NOT NULL

    ,ProdID INTEGER NOT NULL

    ,SalesAmount NUMERIC(5,2) NOT NULL

    ,TransDate DATETIME NOT NULL

    );

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (1,60634,5093,83.00,'2015-11-11 00:00:00.000'); -- amended

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (2,5755,6803,81.34,'2015-11-19 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (3,89722,5702,95.11,'2015-11-27 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (4,31744,1603,49.31,'2015-11-04 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (5,42486,939,60.25,'2015-11-28 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (6,5952,482,30.08,'2015-11-28 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (7,28933,769,43.93,'2015-11-15 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (8,7000,3371,79.29,'2015-11-22 00:00:00.000'); -- amended

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (9,58330,1777,77.69,'2015-11-01 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (10,31558,6910,45.30,'2015-11-12 00:00:00.000');

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (11,3142,6000,15.30,'2015-12-12 00:00:00.000'); -- new

    INSERT INTO Dec_file(TranID,CustomerID,ProdID,SalesAmount,TransDate) VALUES (12,777,5755,12.68,'2015-12-01 00:00:00.000'); -- new

    -- do some work ansd check the results......indexes will be useful of course !

    SELECT * INTO #temp FROM (

    SELECT *

    FROM Dec_file

    EXCEPT

    SELECT *

    FROM Base_Table)x

    SELECT * FROM #temp -- see the differences

    --apply differences

    DELETE FROM Base_table

    FROM #temp

    INNER JOIN Base_table ON #temp.TranID = Base_table.TranID;

    INSERT INTO Base_table

    SELECT #temp.*

    FROM #temp;

    -- check for discerepancies

    SELECT *

    FROM Dec_file

    EXCEPT

    SELECT *

    FROM Base_Table;

    ________________________________________________________________
    you can lead a user to data....but you cannot make them think
    and remember....every day is a school day

  • oops did not see the second page

Viewing 15 posts - 1 through 15 (of 25 total)

You must be logged in to reply to this topic. Login to reply