作者:dwxa520恋歌_261 | 来源:互联网 | 2023-09-18 13:46
主要的 txt (data.txt) 包含例如:
Lib ID 4444
QT ID 4444
SOQ ID 80
MAC ID 21563
LED ID 4444
TRD ID 80
CAD ID 31256
OIL ID 21563
MNO ID 3315
TOP ID 638
而这样的例子不胜枚举。数字最少为 2 到 5 位数字。我想要做的是匹配相同的数字并将它们保存在单独的 txt 文件中。文件可以按号码名称保存,也可以是任何随机名称。例如,它将以下 ID 保存在单独的 txt 文件(4444.txt 或 random.txt)中:
LED ID 4444
QT ID 4444
Lib ID 4444
它将在不同的txt中保存80个:
TRD ID 80
SOQ ID 80
在不同的 txt 中保存另一个匹配项:
OIL ID 21563
MAC ID 21563
并且具有唯一编号的行将保存在不同的 txt 中,例如 (unique.txt):
MNO ID 3315
TOP ID 638
CAD ID 31256
我试过使用这个正则表达式:
(d)(?!1+$)d*
它匹配相似的数字,但我坚持将它们分开。任何帮助将不胜感激。
回答
$ cat tst.awk
{
if ( $3 in key2out ) {
out = key2out[$3]
if ( $3 in key2first ) {
print key2first[$3] > out
delete key2first[$3]
}
print >> out
close(out)
}
else {
key2out[$3] = $3 ".txt"
key2first[$3] = $0
}
}
END {
for (key in key2first) {
print key2first[key] > "unique.txt"
}
}
$ awk -f tst.awk file
$ head *.txt
==> 21563.txt <==
MAC ID 21563
OIL ID 21563
==> 4444.txt <==
Lib ID 4444
QT ID 4444
LED ID 4444
==> 80.txt <==
SOQ ID 80
TRD ID 80
==> unique.txt <==
TOP ID 638
MNO ID 3315
CAD ID 31256
回答
使用您显示的样本,您能否尝试以下操作。用 GNU 编写和测试awk
。
awk '
FNR==NR{
arr[$NF]++
next
}
arr[$NF]==1{
print > ("unique.txt")
next
}
arr[$NF]>1{
outFile=$NF".txt"
print >> (outFile)
close(outFile)
}
' Input_file Input_file
说明:为以上添加详细说明。
awk ' ##Starting awk program from here.
FNR==NR{ ##Checking condition which will be TRUE when Input_file is being read first time.
arr[$NF]++ ##Creating arr with index of last field and increasing it 1 each time it comes with same one.
next ##next will skip all further statements from here.
}
arr[$NF]==1{ ##Checking condition if any value(last field) occurs only 1 time in whole Input_file then do following.
print > ("unique.txt") ##Printing current line to unique.txt output file.
next ##next will skip all further statements from here.
}
arr[$NF]>1{ ##Checking condition if last field comes more than 1 then do following.
outFile=$NF".txt" ##Creating outFile variable with last field .txt to it.
print >> (outFile) ##Printing current line to output file here.
close(outFile) ##Closing output file in backend to avoid "too many opened files" error.
}
' Input_file Input_file ##Mentioning Input_file(s) here.
回答
另一个 awk 创建名为1
... n
(f
代码中的变量)的文件:
$ awk '{
if(!($3 in a) && !($3 in u)) {
u[$3]=$0
next
}
if($3 in u) { # u hash holds uniques
a[$3]=++f # file naming happens here
print u[$3] >> a[$3]
print >> a[$3]
close(a[$3])
delete u[$3] # delete from unique hash when not unique anymore
next
}
print >> a[$3]
close(a[$3])
}
END { # in the end
f++
for(i in u) # print all uniques to last file
print u[i] > f
}' file
回答
@ECHO OFF
SETLOCAL
rem The following settings for the source directory, destination directory, target directory,
rem batch directory, filenames, output filename and temporary filename [if shown] are names
rem that I use for testing and deliberately include names which include spaces to make sure
rem that the process works using such names. These will need to be changed to suit your situation.
SET "sourcedir=u:your files"
SET "destdir=u:your results"
SET "filename1=%sourcedir%q66304300.txt"
FOR /f "usebackqtokens=1,2,*delims= " %%u IN ("%filename1%") DO (ECHO %%u %%v %%w)>>"%destdir%%%w.txt"
FOR /f %%e IN ('dir /b /a-d "%destdir%*.txt"') DO (
SET "multiline="
FOR /f "usebackqskip=1" %%b IN ("%destdir%%%e") DO SET "multiline=%%b"
IF NOT DEFINED multiline TYPE "%destdir%%%e">>"%destdir%oncers.txt"&DEL "%destdir%%%e"
)
GOTO :EOF
从在的资源文件的每一行,选择每个3个栏至%%u
,%%v
,%%w
.TXT在假定的空目标目录并追加到文件名(第三列)。
然后从目标目录中读取每个文件。如果它只有 1 行,则将其附加到oncers.txt
目标目录中并删除它。