For those interested, I loaded all the text from the file into a rawdata table, and then started parsing into two tables, setHeader (setDate, setYear), and then into setDetails, and linked up to setHeader. Here is the sql (made up as I went along):
-- create a rawdata table to import the raw data
if not exists (select * from sysobjects where id = OBJECT_ID('dbo.rawdata') and xtype = 'U')
create table dbo.rawdata (
raw_id int identity(1,1), -- used later to group up
rawdata varchar(256)
)
-- import the rawdata -- i used excel to write inserts. (see SQL Query 2)
-- also, i replaced ticks ("'") with "#" in the text file to make my life simpler.
select * from rawdata
DELETE rawdata where rtrim(rawdata) = '' -- get rid of empty lines
select * from rawdata
-- start parsing rawdata into tables.
-- create set table
create table setHeader (
setHeaderId int identity(1,1),
setDate datetime,
setYear int,
specialty varchar(255),
raw_id int,
rawtrim varchar(255),
rawdata varchar(255)
)
--drop table setHeader
-- add recs to set table. based on if there is a date at the front of text
insert into setHeader (
setDate,
rawtrim,
rawdata,
raw_id
)
select
LEFT(rawdata, charindex('-',rawdata) - 2),
REPLACE(replace(rawdata, 'It#s', ''), '!', ''),
rawdata,
raw_id
from
rawdata
where charindex('-', rawdata) > 0
and ISDATE(LEFT(rawdata, charindex('-',rawdata) - 2)) = 1
ORDER BY CONVERT(datetime, LEFT(rawdata, charindex('-',rawdata) - 2))
-- get rid of days, we can always figure that out from the date
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Monday', '')
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Tuesday', '')
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Wednesday', '')
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Thursday', '')
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Friday', '')
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Saturday', '')
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Sunday', '')
UPDATE setHeader SET rawtrim = replace(rawtrim, 'Monday', '')
select * from setHeader
-- get years if numeric (not completely reliable)
update setHeader set setYear = RIGHT(rawtrim, 4)
where Isnumeric(RIGHT(rawtrim, 4)) = 1
-- if year is null, must be specialty. set specialty
select * from setHeader where setYear is null
update setHeader set specialty = ltrim(right(rawtrim, charindex('-', Reverse(rawtrim))-3))
from setHeader where setYear is null
--
select * from setHeader
-- okay, sets look good, move on to set entries
create table setEntries(
setEntriesId int identity(1,1),
setHeaderId int,
entryOrder int,
entryType char(1),
songOrder int,
song varchar(255),
artist varchar(255),
raw_id int,
rawtrim varchar(255),
rawdata varchar(255)
)
alter table setEntries
add bos bit
select * from setEntries
insert into setEntries (
rawdata,
raw_id
)
select
rawdata,
raw_id
from
rawdata
where charindex('-', rawdata) > 0
and ISDATE(LEFT(rawdata, charindex('-',rawdata) - 2)) < 1
-- attach entries to sets
update setEntries set setHeaderId = h.setHeaderId from (
select setHeaderId, raw_id, (select top 1 raw_id from setHeader s2 where s2.raw_id > sh.raw_id
order by s2.raw_id) raw_next
from setHeader sh
) h
where setEntries.raw_id between h.raw_id and isnull(h.raw_next, 9999)
-- lets parse the rawdata
select * from setEntries
where CHARINDEX(',', rawdata) > 0
-- fix 2 rows with , instead of period in numbering
update setEntries set rawdata = replace(rawdata, ',', '.') where raw_id in (161, 392)
--select
-- rawdata,
-- LEFT(rawdata, charindex('.', rawdata) - 1), --,
-- replace(ltrim(rtrim(SUBSTRING(rawdata, charindex('.', rawdata) + 1, charindex('-', rawdata) - 3))), '-', ''),
-- replace(right(rawdata, len(rawdata) - charindex('-', rawdata)), '#', '''')
--from setEntries
--where CHARINDEX('.', rawdata) >0
update setEntries set
entryType = 'S',
songOrder = LEFT(rawdata, charindex('.', rawdata) - 1),
song = replace(right(rawdata, len(rawdata) - charindex('-', rawdata)), '#', ''''),
artist = replace(ltrim(rtrim(SUBSTRING(rawdata, charindex('.', rawdata) + 1, charindex('-', rawdata) - 3))), '-', '')
where CHARINDEX('.', rawdata) >0
update setEntries set artist = REPLACE(artist, '#', '''')
-- deal with BOS
select * FROM setEntries where song like '%BOS%'
update setEntries set bos = 1 where song like '%BOS%'
update setEntries set song = REPLACE(song, '(BOS)', '')
update setEntries set song = REPLACE(song, '(2-way tie for BOS)', '')
update setEntries set song = REPLACE(song, '(3-way tie for BOS)', '')
update setEntries set song = REPLACE(song, '(4-way tie for BOS)', '')
update setEntries set song = REPLACE(song, '(three-way tie for BOS)', '')
update setEntries set song = REPLACE(song, '(two-way tie for BOS)', '')
update setEntries set bos = 0 where setEntriesId = 804
select * from setEntries
select song, artist, COUNT(*), MAX(setEntriesId) from setEntries
group by song, artist
order by count(*) desc, song, artist
--order by COUNT(*) desc
select MAX(setDate) from setHeader
select song, artist, COUNT(*) cnt, max(setDate) maxdate, MIN(setDate) mindate
from setEntries se
join setHeader sh on sh.setHeaderId = se.setHeaderId
where bos = 1
group by song, artist
order by COUNT(*) desc
select * from setEntries where setEntriesId = 256
Eminence Front (4-way tie for BOS)
Mr. Jones (two-way tie for BOS)
So Tired of Being Alone (three-way tie for BOS)
Hot in Herre (2-way tie for BOS)
Fake Plastic Trees (3-way tie for BOS)
select * from setHeader sh
join setEntries se on se.setHeaderId = sh.setHeaderId
order by sh.setDate, se.setHeaderId, se.raw_id
select setYear, COUNT(*) from setHeader
where setYear is not null
group by setYear
order by COUNT(*) desc
select * from setHeader where setYear = 977
righREVERSE
select charindex(' ',rawdata) from rawdata
where
8/30/13 - Friday! It#s the End Of Summer
select setYear, artist, COUNT(*)
from setHeader sh
join setEntries se on se.setHeaderId = sh.setHeaderId
group by setYear, artist
order by COUNT(*) desc
select artist, COUNT(*)
from setHeader sh
join setEntries se on se.setHeaderId = sh.setHeaderId
group by artist
order by COUNT(*) desc