Monday, October 28, 2013

Informatica Debugging - Setting the PMError Log

Recently I hit some transformation errors. By default these errors are swallowed by the Integration Engine. It does not try to write the row to target.It does not add row level details to the session log.

Looking at the session log only reveals a hint of what the error may be:

2013-09-03 09:10:38 : INFO : (11652 | TRANSF_1_1_1) : (IS | DOMAIN) : node01_SERVER : CMN_1053 : : Rowdata: ( RowType=-4(error) Src Rowid=854 Targ Rowid=854
  lkp_Key_(lkp_EDW_ID_Passthrough:Int:): "(NULL)"
  lkp_Start_DT (lkp_Start_DT:Date:): "11/27/2012 00:00:00.000000000"
)

There was no clear definition in Communities for this error. The sess log was not clear where it occured.
I had my QA person do the following to the session throwing the transformation error
Check out and open the session
Click Config Object
Scroll to Error Handling
Change Error Log Type - Flat File
Change Error Log File Directory to $PMLookupFileDir
Save and run the session
Undo the checkout...as this change makes the session run at a row by row level that can impact performance.

The following was produced.

Transformation||Transformation Mapplet Name||Transformation Group||Partition Index||Transformation Row ID||Error Sequence||Error Timestamp||Error UTC Time||Error Code||Error Message||Error Type||Transformation Data||Source Mapplet Name||Source Name||Source Row ID||Source Row Type||Source Data
FIL_Expire||N/A||Output||1||38||1||09/03/2013 16:34:19||137829999||11999||There is an error in the port [lkp_Key]: The default value for the port is set to: ERROR(Transformation error: FIL_Expire).||3||D:004sfsdfsdfs655E914DD833FF5CBAA88A|D:11/27/2012 00:00:00.000000000|D:1|D:0||N/A||SQ_Shortcut_to_SourceOfDimensionTbl123||38||0||D:0040484394655E914DasdasdasdA88A|D:12/13/2012 00:00:00.000000000|D:6384 XMJWRFS XY|N:|D:Dim1|D:Dim2|D:Dim3|D:Dim4

This was 100% more useful than the sess log, as I use that lkp port in several objects, it was now clear which one and which port was impacted.

Permissions Deployment Script

/*
    Thanks to Chris K.
    Edit the table variables' values for custom roles, users (+ their logins and tiers), and user-role mappings.
    Server logins must pre-exist.
*/

--CHANGE THIS: matches databse you are deploying to
USE DataBaser123

--CHANGE THIS: D = Dev, Q = QA , U = UAT , P = Prod, D = DR
DECLARE @tierCode CHAR(1) = 'D'

set nocount on;

-- define custom db roles
declare @customDbRole table (customDbRoleNm sysname not null primary key,
                             permission nvarchar(4000))
insert  @customDbRole (customDbRoleNm, permission)
values ('db_executor', N'grant execute to db_executor');                           


-- define db users
declare @dbUser table ( dbUserID int not null,      -- unique identifier for user that is consistent across tiers
                        tierCode char(1) not null,  -- [D|Q|U|P]
                        dbUserName sysname not null,
                        loginName sysname not null,
                        primary key ( dbUserId,
                                      tierCode ) )

insert @dbUser (dbUserID, tierCode, dbUserName, loginName)
values

  
    -- some users

    ( 1, 'D', 'domain\user11', 'domain\user11'),
    ( 1, 'Q', 'domain\user1, 'domain\user1'),
    ( 1, 'U', 'domain\user1D1', 'domain\user1D1'),
    ( 1, 'P', 'domain\user1', 'bubba'),  

     -- ssrs users
    ( 2, 'D', 'NW\ssrs_user_dev', 'NW\ssrs_user_dev'),
    ( 2, 'Q', 'NW\ssrs_user_qa',  'NW\ssrs_user_qa'),
    ( 2, 'U', 'NW\ssrs_user_uat', 'NW\ssrs_user_uat'),
    ( 2, 'P', 'NW\ssrs_user_prod','NW\ssrs_user_prod'),
  
    -- Informatica Service Accounts
    ( 3, 'D', 'informatica_read', 'informatica_read'),
    ( 3, 'Q', 'informatica_read', 'informatica_read'),
    ( 3, 'U', 'informatica_read', 'informatica_read'),
    ( 3, 'P', 'informatica_read', 'informatica_read'),
    ( 4, 'D', 'informatica_write', 'informatica_write'),
    ( 4, 'Q', 'informatica_write', 'informatica_write'),
    ( 4, 'U', 'informatica_write', 'informatica_write'),
    ( 4, 'P', 'informatica_write', 'informatica_write');


/*
define user-role mapping
ROLES: 'db_owner' 'db_datareader' 'db_datawriter' 'db_executor' 'db_ddladmin'
*/
declare @dbUserDbRole table ( dbUserID int not null,
                              dbRoleNm sysname not null,
                              primary key (dbUserId, dbRoleNm) )

insert @dbUserDbRole (dbUserID, dbRoleNm)
values (1, 'db_datareader'),
       (1, 'db_datawriter'),
       (1, 'db_executor'),
       (2, 'db_datareader'),     
       (3, 'db_datareader'),
       (3, 'db_executor'),
       (4, 'db_datareader'),
       (4, 'db_datawriter'),
       (4, 'db_executor');


------------------------
-- COMMON BLOCK BELOW --
------------------------

    -- derive at which tier permissions are being applied (this relies on using the registry setting or that the
    -- servername defining tier as the 2nd character)

DECLARE @outvar varchar(255)

if @outvar is not null
begin
    select  @tierCode = substring(@outvar,1,1);
end
else
begin
    select  @tierCode = case when @@servername like 'BV%'
                             then 'D'
                             else substring(@@servername,2,1)
                        end;
end;

declare @printStmt varchar(8000);
select  @printStmt = 'Applying permissions for ' +
    case when @tierCode = 'D' then 'Development'
         when @tierCode = 'Q' then 'QA'
         when @tierCode = 'U' then 'UAT'
         when @tierCode = 'P' then 'Production'
         else 'Uncertain'
    end;
print @printStmt;

-- output information about existing users that are untouched:
declare @untouchedUser table ( unTouchedUserNm sysname not null primary key)
insert @untouchedUser ( unTouchedUserNm )
select  princ.name
from sys.database_principals princ
where princ.type in (N'U',N'S')
  and name not in ('public','dbo','guest','information_schema','sys')
  and name not in (select dbUserName from @dbUser where tierCode = @tierCode);

while exists (select 1 from @untouchedUser)
begin
    declare @uun sysname;
    select top 1 @uun = unTouchedUserNm,
           @printStmt = 'The following user exists in the target but is not source controlled: ' +
                        unTouchedUserNm
    from @untouchedUser
    order by unTouchedUserNm;
  
    print @printStmt;
  
    delete @untouchedUser
    where unTouchedUserNm = @uun;
end;

-- apply permissions

declare @sql nvarchar(4000) = null,
        @rc int = null;

    -- custom roles
while exists (select 1 from @customDbRole)
begin
    declare @customDbRoleNm sysname, @permission nvarchar(4000);
    select top 1 @customDbRoleNm = customDbRoleNm, @permission = permission
    from @customDbRole
    order by customDbRoleNm;

    select @sql = N'if not exists (select 1 from sys.database_principals WHERE name = N''' + @customDbRoleNm + ''' and type = ''R'') create role [' + @customDbRoleNm + ']';
    exec @rc = sp_executeSql @sql;
    if @rc <> 0 or @@error <> 0
    begin
        raiserror('Error from executeSql on: %s',16,1,@sql);      
        return;
    end;
  
    print @sql;

    exec @rc = sp_executeSql @permission;
    if @rc <> 0 or @@error <> 0
    begin
        raiserror('Error from executeSql on: %s',16,1,@permission);      
        return;
    end;
  
    print @permission;

    delete @customDbRole
    where customDbRoleNm = @customDbRoleNm;
end

    -- users and role mapping
while exists (select 1 from @dbUser where tierCode = @tierCode)
begin
    declare @dbUserID int, @dbUserNm sysname, @loginNm sysname;
    select top 1 @dbUserId = dbUserId, @dbUserNm = dbUserName, @loginNm = loginName
    from @dbUser
    where tierCode = @tierCode
    order by dbUserId;

    select @sql = N'if not exists (select 1 from sys.database_principals WHERE name = N''' + @dbUserNm + ''') create user [' + @dbUserNm + '] for login [' + @loginNm + ']';
    exec @rc = sp_executeSql @sql;
    if @rc <> 0 or @@error <> 0
    begin
        raiserror('Error from executeSql on: %s',16,1,@sql);      
        return;
    end;
    print @sql;

    -- add user to roles
    while exists (select 1 from @dbUserDbRole where dbUserId = @dbUserId)
    begin
        declare @dbRoleNm sysname;
        select top 1 @dbRoleNm = dbRoleNm
        from @dbUserDbRole
        where dbUserId = @dbUserId
        order by dbRoleNm
      
        select @sql = N'exec sys.sp_addrolemember @rolename = N''' + @dbRoleNm + ''', @membername = N''' + @dbUserNm + '''';
        exec @rc = sp_executeSql @sql;
        if @rc <> 0 or @@error <> 0
        begin
            raiserror('Error from executeSql on: %s',16,1,@sql);      
            return;
        end;
        print @sql;      
          
        delete @dbUserDbRole
        where dbUserId = @dbUserId
          and dbRoleNm = @dbRoleNm
    end

    delete @dbUser
    where dbUserId = @dbUserID
      and tierCode = @tierCode;
end

Database Performance Testing Tip - Clear Cache and Buffers

Prior to testing each query set, call dbcc freeproccache and dbcc dropcleanbuffers.

Wednesday, October 16, 2013

Estimating Data Integration Projects

Not an original idea, more of a whole sale rip off of a post we found.



T = ((E * D * 5)+(S * C * 6)) * A + F
where
T = total no. of days to allow for implementation
E = no. of entities in physical model
D = data quality (D = 0.5 very clean, D = 1 medium cleanliness, D = 1.5 dirty, a lot of work)
S = no. of source systems
C = complexity of transformations (C = 1 relatively simple, C = 0.5 1 to 1 mapping, C = 2 complex, lookups required, derived data, calculations)
A = automation factor (A = 1 ETL tool used, A = 1.6 manual coding)
F = first time implementation, use of ETL (F = 0 many implementations done, F = 20 small & flexible, F = 50 large development)
this estimates have been tried and proven from my project management experience in projects implementing with Informatica as ETL tool.
try it and refine it at the end of each project.