Thursday, September 5, 2024

In Search of.... Duplicate indexes

 2 new queries... the first works with SQL 2017 & later, the second for older versions.

Query for newer releases:

WITH IncludedColumns AS (

SELECT i.object_id AS TableID, 

        I.index_id AS IndexID, 

STRING_AGG(c.name, ', ') WITHIN GROUP (ORDER BY c.name) AS Includes

FROM sys.index_columns ic

JOIN sys.indexes i

ON ic.object_id = i.object_id

AND ic.index_id = i.index_id

JOIN sys.columns c

ON c.object_id = ic.object_id

AND c.column_id = ic.column_id

WHERE ic.is_included_column = 1

GROUP BY i.object_id, i.index_id


), 

MyDuplicate AS (

    SELECT 

        Sch.[name] AS SchemaName, 

        Obj.[name] AS TableName, 

        Idx.[name] AS IndexName, 

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 1) AS Col1, 

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 2) AS Col2,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 3) AS Col3,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 4) AS Col4,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 5) AS Col5,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 6) AS Col6,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 7) AS Col7,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 8) AS Col8,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 9) AS Col9,

Idx.filter_definition AS Filter_definition,

ic.Includes,

Idx.is_primary_key,

Idx.is_unique,

Idx.is_unique_constraint


        -- Add more columns as needed

    FROM  

        sys.indexes Idx 

    INNER JOIN  

        sys.objects Obj ON Idx.[object_id] = Obj.[object_id]  

    INNER JOIN  

        sys.schemas Sch ON Sch.[schema_id] = Obj.[schema_id] 

LEFT OUTER JOIN IncludedColumns IC

ON Ic.TableID = Idx.object_id

AND Ic.IndexID = Idx.index_id

    WHERE  

        index_id > 0

AND Sch.name <> 'sys'

)

SELECT  

    MD1.SchemaName,  

    MD1.TableName,  

    MD1.IndexName, 

MD1.is_primary_key,

MD2.is_primary_key,

MD1.is_unique,

MD2.is_unique,

MD1.is_unique_constraint,

MD2.is_unique_constraint,

    MD2.IndexName AS OverLappingIndex, 

    MD1.Col1,  

    MD1.Col2,

    MD1.Col3,

    MD1.Col4,

    MD1.Col5,

    MD1.Col6,

    MD1.Col7,

    MD1.Col8,

    MD1.Col9,

MD1.Filter_definition AS F1,

MD2.Filter_definition AS F2,

MD1.Includes AS Includes1,

MD2.Includes AS Includes2,

'DROP INDEX ' +

MD2.IndexName +

' ON ' +

QUOTENAME(MD2.SchemaName) +

'.' +

QUOTENAME(MD2.TableName) + '

GO

' AS DropCommand 

    -- Add more columns as needed

FROM  

    MyDuplicate MD1 

INNER JOIN  

    MyDuplicate MD2 ON MD1.tablename = MD2.tablename 

                      AND MD1.indexname <> MD2.indexname

where md1.COL1 = md2.COL1

AND ISNULL(md1.col2, '') = ISNULL(md2.col2, '')

AND ISNULL(md1.col3, '') = ISNULL(md2.col3, '')

AND ISNULL(md1.col4, '') = ISNULL(md2.col4, '')

AND ISNULL(md1.col5, '') = ISNULL(md2.col5, '')

AND ISNULL(md1.col6, '') = ISNULL(md2.col6, '')

AND ISNULL(md1.col7, '') = ISNULL(md2.col7, '')

AND ISNULL(md1.col8, '') = ISNULL(md2.col8, '')

AND ISNULL(md1.col9, '') = ISNULL(md2.col9, '')

AND (ISNULL(md1.Includes, '') LIKE ISNULL(md2.Includes, '') + '%'

--OR

--ISNULL(md2.Includes, '') LIKE ISNULL(md1.Includes, '') + '%'

)

AND ISNULL(md1.Filter_definition, '') = ISNULL(md2.Filter_definition, '')

AND MD2.is_primary_key = 0

AND MD2.is_unique_constraint = 0

-- AND Md1.is_unique = Md2.is_unique -- Optional condition

ORDER BY SchemaName, TableName, IndexName, OverLappingIndex




Query for older releases

WITH IncludedColumns AS (

               SELECT i.object_id AS TableID,

                                                                I.index_id AS IndexID,

                --  STRING_AGG(c.name, ', ') WITHIN GROUP (ORDER BY c.name) AS Includes

                Stuff (( select ', '+ col2.name  as [text()]

                         from    sys.indexes ind2

                         inner join      sys.index_columns ic2  

                                                                                                                                on  ind2.object_id = ic2.object_id and ind2.index_id = ic2.index_id

                         inner join     sys.columns col2

                                                                                                                on    ic2.object_id = col2.object_id and ic2.column_id = col2.column_id 

                        where ind2.index_id = i.index_id

                        and  ind2.object_id = t.object_id

                        And  ic2.key_ordinal =0

                        order by ic2.Key_Ordinal

                        for XML path ('')),1,1,'')

                Includes

               FROM sys.index_columns ic

                              JOIN sys.indexes i

                                             ON ic.object_id = i.object_id

                                             AND ic.index_id = i.index_id

                              JOIN sys.columns c

                                             ON c.object_id = ic.object_id

                                             AND c.column_id = ic.column_id

                                                                                                join  sys.tables t on   i.object_id = t.object_id

               WHERE ic.is_included_column = 1

               GROUP BY i.object_id, i.index_id,t.object_id

 

),

MyDuplicate AS (

    SELECT 

        Sch.[name] AS SchemaName, 

        Obj.[name] AS TableName, 

        Idx.[name] AS IndexName, 

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 1) AS Col1, 

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 2) AS Col2,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 3) AS Col3,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 4) AS Col4,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 5) AS Col5,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 6) AS Col6,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 7) AS Col7,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 8) AS Col8,

        INDEX_COL(Sch.[name] + '.' + Obj.[name], Idx.index_id, 9) AS Col9,

Idx.filter_definition AS Filter_definition,

ic.Includes,

Idx.is_primary_key,

Idx.is_unique,

Idx.is_unique_constraint


        -- Add more columns as needed

    FROM  

        sys.indexes Idx 

    INNER JOIN  

        sys.objects Obj ON Idx.[object_id] = Obj.[object_id]  

    INNER JOIN  

        sys.schemas Sch ON Sch.[schema_id] = Obj.[schema_id] 

LEFT OUTER JOIN IncludedColumns IC

ON Ic.TableID = Idx.object_id

AND Ic.IndexID = Idx.index_id

    WHERE  

        index_id > 0

AND Sch.name <> 'sys'

)

SELECT  

    MD1.SchemaName,  

    MD1.TableName,  

    MD1.IndexName, 

MD1.is_primary_key,

MD2.is_primary_key,

MD1.is_unique,

MD2.is_unique,

MD1.is_unique_constraint,

MD2.is_unique_constraint,

    MD2.IndexName AS OverLappingIndex, 

    MD1.Col1,  

    MD1.Col2,

    MD1.Col3,

    MD1.Col4,

    MD1.Col5,

    MD1.Col6,

    MD1.Col7,

    MD1.Col8,

    MD1.Col9,

MD1.Filter_definition AS F1,

MD2.Filter_definition AS F2,

MD1.Includes AS Includes1,

MD2.Includes AS Includes2,

'DROP INDEX ' +

MD2.IndexName +

' ON ' +

QUOTENAME(MD2.SchemaName) +

'.' +

QUOTENAME(MD2.TableName) + '

GO

' AS DropCommand 

    -- Add more columns as needed

FROM  

    MyDuplicate MD1 

INNER JOIN  

    MyDuplicate MD2 ON MD1.tablename = MD2.tablename 

                      AND MD1.indexname <> MD2.indexname

where md1.COL1 = md2.COL1

AND ISNULL(md1.col2, '') = ISNULL(md2.col2, '')

AND ISNULL(md1.col3, '') = ISNULL(md2.col3, '')

AND ISNULL(md1.col4, '') = ISNULL(md2.col4, '')

AND ISNULL(md1.col5, '') = ISNULL(md2.col5, '')

AND ISNULL(md1.col6, '') = ISNULL(md2.col6, '')

AND ISNULL(md1.col7, '') = ISNULL(md2.col7, '')

AND ISNULL(md1.col8, '') = ISNULL(md2.col8, '')

AND ISNULL(md1.col9, '') = ISNULL(md2.col9, '')

AND (ISNULL(md1.Includes, '') LIKE ISNULL(md2.Includes, '') + '%'

--OR

--ISNULL(md2.Includes, '') LIKE ISNULL(md1.Includes, '') + '%'

)

AND ISNULL(md1.Filter_definition, '') = ISNULL(md2.Filter_definition, '')

AND MD2.is_primary_key = 0

AND MD2.is_unique_constraint = 0

-- AND Md1.is_unique = Md2.is_unique -- Optional condition

ORDER BY SchemaName, TableName, IndexName, OverLappingIndex 


Wednesday, March 22, 2023

Tuning 3rd-Party Applications

 

BLOG ENTRY – Tuning 3rd-party software products

 

The consensus amongst CIO-intelligentsia states that you shouldn’t buy a software tool that provides less than 95% of what you need – it’s cheaper to build.

To answer this, many 3rd-party products provide 500% of what you need, knowing that you’ll use the components of your application that you need and leave what you don’t use.

The problem with this is that with thousands of companies and tens- to hundreds of thousands of users, the product is used so many ways that it’s difficult for the vendor to tune for your environment. So, you have to tune your own databases in a way that does not change the logic of the application.

The way to do this is with SQL Server-based applications is to use a standard, “Black box” approach:

  1. Monitor the database environment (While we like SolarWinds DPA & SQL Sentry, there are many others on the market)
  2. Validate physical resources: CPU, IO, Network, etc.
    1. Do NOT make any hardware changes in the first iteration; this is your baseline. After that, you might consider more CPU or faster storage
  3. Use the database engine tuning advisor (DETA) to identify missing indexes (the easiest way to improve performance without changing the code or logic, which is likely to void a software warranty agreement)
    1. The easiest way to do this is with the missing index script in my BLOG entry, mssqlperformance.blogspot.com “Missing indexes” – make sure you read the notes about duplicate indexes
    2. While it is extraordinarily unlikely that adding an index will negatively affect performance (as opposed to making it much faster), some 3rd-party software providers don’t ‘have a sense of humor about it. We recommend a frank conversation with their support team.
    3. Before adding the indexes, create a script to roll the indexes back out of the database. While I haven’t needed to do this in well over a decade, better safe than sorry.
  4. Repeat

Saturday, March 6, 2021

Backup frequency

A backup strategy is NOT a technical decision, it is the result of applying technical expertise to business decisions regarding RPO (Recovery Point Objective) and RTO (Recovery Time Objective).

 

For example, if you are backing up a 5T database, it doesn’t really matter if you are backing up frequently if you can’t restore within your own defined SLA (Service Level Agreement). On top of that, you must (and usually will) need to decided what amount of data is a reasonable loss (if any). That is to say, if you are backing up your database daily, and taking incremental backups hourly, can you afford to lose the 30-minutes worth of data that will be lost, on average, should the servers crash.

 

In addition, you need to discuss the types of disaster you are backing up for. For example, if you don’t have an air-gapped backup, you may be paying ransomware at some point in your future.

 

For a very small (few hundred gig) system, you might  backup:

 

Full backups every 24 hours,

Log Backups every 15  minutes,

 

And realize that you have decided that a 7.5-minute loss is something that you live with if everything hits the fan.

 

If you have a multi-terabyte database, you may find that it takes hours to backup the “standard” way and that you need to use other tools, hardware, or perhaps add differential backups to the mix.

 

And in either case, make very sure that you have:

 

  1. Made sure that the backup is free of corruption
  2. Instructed the system  group to sweep the backups off the target disks
  3. Tested your restore process from the remote storage.

 

Backups are a 4-hour lecture; or a 30-minute white board session with executives. This is only a start.

Tuesday, May 26, 2020

What queries are running at the moment?

select
text
FROM
sys.dm_exec_requests er
INNER JOIN sys.sysprocesses sp ON er.session_id = sp.spid
CROSS APPLY sys.dm_exec_sql_text(er.sql_handle)as qt
WHERE
session_Id > 50 and -- exclude system stuff
text like '%create index%' -- find something we care about
and session_id <> @@spid -- exclude this session

Wednesday, February 19, 2020

When is an index redundant?


Recently I was called into a shop that had a 4 terabyte database, who claimed they didn’t have anywhere near that much data. Upon investigation, they were correct. They had about 100 gig of data, and about 3.9 T of indexes.

Is this too much? Well, in order to make that kind of judgement call there are several factors to consider:

  1. Are the indexes interfering with writes (i.e. is the number of indexes having a negative factor on insert / update performance)?
  2. Are the indexes actually being used?
  3. Are any indexes redundant?

The first one (write performance) can be identified by checking length of the write queues; almost any monitoring tool can get you this information.

The second can be identified with a script you can find on the web which counts the number of times an index has been accessed since the last time the server was booted. Of course, you want to the server to have been up long enough that the system tables are aware of weekend / month end processing. If you can’t find one readily, you can use the one I have on my blog:

Mssqlperformance.blogspot.com

The final one is the tool I used to reduce the database size for that customer from 4T to 300M without even doing “what’s being used” analysis… just by eliminating redundant indexes.

Many people find a script on the web (I have one in the blog mentioned above) which will look at the system tables and recommend missing indexes. The problem is, it will give you 6 similar indexes rather than one combined index. You have to do this yourself. If you automatically add all the indexes the server suggests… well, you can end up with 4T of indexes for 100G of data.

So how do we know what’s redundant?

Hopefully, you are old enough to remember a phone book. A phone book is an index on last name, first name, middle name. But what other indexes might that be used for? A lookup on just last name, first name could use the same phone book (and often does), as would a lookup just to find last name.

So if you have three indexes:

  1. Last name, first name, middle name
  2. Last name, first name
  3. Last name

You can visually look at these and KNOW that the last 2 are redundant because the first index can do the work that either of the second two could do.

As you start looking at data management view (DMV) information, you often get index recommendations along these lines:

  1. Create index idx on TableA (Column1)
  2. Create index idx on TableA (Column1) include (ColumnA, ColumnB)
  3. Create index idx on TableA (Column1, Column2) include (ColumnC, ColumnD)

Sometimes you’ll get 8 or 10 of these.

So, index 1 can easily be eliminated because index 2 covers it.
Index 2 can be combined into index 3 like this:

  1. Create index idx on TableA (Column1, Column2) include (ColumnA, ColumnB , ColumnC, ColumnD)

So… in summary, you can pull index recommendations from the DMVs, and they are good recommendations, but review them with an eye for detail before automatically applying them.

Wednesday, July 31, 2019

List indexes with keys by table

Note: Code borrowed from:

https://stackoverflow.com/questions/765867/list-of-all-index-index-columns-in-sql-server-db

... and modified to suit my needs.

SELECT
  schema_name(schema_id) as SchemaName, OBJECT_NAME(si.object_id) as TableName, si.name as IndexName,
  (CASE is_primary_key WHEN 1 THEN 'PK' ELSE '' END) as PK,
  (CASE is_unique WHEN 1 THEN '1' ELSE '0' END)+' '+
  (CASE si.type WHEN 1 THEN 'C' WHEN 3 THEN 'X' ELSE 'B' END)+' '+  -- B=basic, C=Clustered, X=XML
  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,1,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+
  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,2,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+
  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,3,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+
  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,4,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+
  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,5,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+
  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,6,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+
  '' as 'Type',
  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,1) as Key1,
  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,2) as Key2,
  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,3) as Key3,
  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,4) as Key4,
  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,5) as Key5,
  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,6) as Key6
FROM sys.indexes as si
LEFT JOIN sys.objects as so on so.object_id=si.object_id
WHERE index_id>0 -- omit the default heap
  and OBJECTPROPERTY(si.object_id,'IsMsShipped')=0 -- omit system tables
  and not (schema_name(schema_id)='dbo' and OBJECT_NAME(si.object_id)='sysdiagrams') -- omit sysdiagrams
ORDER BY SchemaName ,TableName , Key1, Key2, Key3, Key4, Key5, Key6

This version excludes tables with only one index:

SELECT

  schema_name(schema_id) as SchemaName, OBJECT_NAME(si.object_id) as TableName, si.name as IndexName,

  (CASE is_primary_key WHEN 1 THEN 'PK' ELSE '' END) as PK,

  (CASE is_unique WHEN 1 THEN '1' ELSE '0' END)+' '+

  (CASE si.type WHEN 1 THEN 'C' WHEN 3 THEN 'X' ELSE 'B' END)+' '+  -- B=basic, C=Clustered, X=XML

  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,1,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+

  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,2,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+

  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,3,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+

  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,4,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+

  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,5,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+

  (CASE INDEXKEY_PROPERTY(si.object_id,index_id,6,'IsDescending') WHEN 0 THEN 'A' WHEN 1 THEN 'D' ELSE '' END)+

  '' as 'Type',

  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,1) as Key1,

  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,2) as Key2,

  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,3) as Key3,

  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,4) as Key4,

  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,5) as Key5,

  INDEX_COL(schema_name(schema_id)+'.'+OBJECT_NAME(si.object_id),index_id,6) as Key6

FROM sys.indexes as si

LEFT JOIN sys.objects as so on so.object_id=si.object_id

WHERE index_id>0 -- omit the default heap

  and OBJECTPROPERTY(si.object_id,'IsMsShipped')=0 -- omit system tables

  and not (schema_name(schema_id)='dbo' and OBJECT_NAME(si.object_id)='sysdiagrams') -- omit sysdiagrams

  and so.object_id in (select object_id  from sys.indexes group by object_id having count(object_id) > 1)

ORDER BY SchemaName ,TableName , Key1, Key2, Key3, Key4, Key5, Key6




Tuesday, April 30, 2019

Excessive compile times

This code was brought to my attention by an associate, Valentin Iamandi, who mentioned it was from Jonathan Kehayias ... we have a client who may be victim of a SQL Server compilation bug (3 seconds compile time, 75ms execution time, this is to identify other potential victims.


-- Find high compile resource plans in the plan cache
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
WITH XMLNAMESPACES 
(DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan')
SELECT TOP 10
CompileTime_ms,
CompileCPU_ms,
CompileMemory_KB,
qs.execution_count,
qs.total_elapsed_time/1000 AS duration_ms,
qs.total_worker_time/1000 as cputime_ms,
(qs.total_elapsed_time/qs.execution_count)/1000 AS avg_duration_ms,
(qs.total_worker_time/qs.execution_count)/1000 AS avg_cputime_ms,
qs.max_elapsed_time/1000 AS max_duration_ms,
qs.max_worker_time/1000 AS max_cputime_ms,
SUBSTRING(st.text, (qs.statement_start_offset / 2) + 1,
(CASE qs.statement_end_offset
WHEN -1 THEN DATALENGTH(st.text)
ELSE qs.statement_end_offset
END - qs.statement_start_offset) / 2 + 1) AS StmtText,
query_hash,
query_plan_hash
FROM
(
SELECT 
c.value('xs:hexBinary(substring((@QueryHash)[1],3))', 'varbinary(max)') AS QueryHash,
c.value('xs:hexBinary(substring((@QueryPlanHash)[1],3))', 'varbinary(max)') AS QueryPlanHash,
c.value('(QueryPlan/@CompileTime)[1]', 'int') AS CompileTime_ms,
c.value('(QueryPlan/@CompileCPU)[1]', 'int') AS CompileCPU_ms,
c.value('(QueryPlan/@CompileMemory)[1]', 'int') AS CompileMemory_KB,
qp.query_plan
FROM sys.dm_exec_cached_plans AS cp
CROSS APPLY sys.dm_exec_query_plan(cp.plan_handle) AS qp
CROSS APPLY qp.query_plan.nodes('ShowPlanXML/BatchSequence/Batch/Statements/StmtSimple') AS n(c)
) AS tab
JOIN sys.dm_exec_query_stats AS qs
ON tab.QueryHash = qs.query_hash
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) AS st
ORDER BY CompileTime_ms DESC
OPTION(RECOMPILE, MAXDOP 1);